diff --git ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q index 712ece7b7d24218e61aaaab909c9eb2025ce0965..8521631e557dc772ff604b31f06c60bc61af10d8 100644 --- ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q +++ ql/src/test/queries/clientnegative/columnstats_partlvl_invalid_values.q @@ -1,4 +1,3 @@ --- JAVA_VERSION_SPECIFIC_OUTPUT DROP TABLE Employee_Part; diff --git ql/src/test/queries/clientpositive/authorization_explain.q ql/src/test/queries/clientpositive/authorization_explain.q index 6a9475cfc011e0e2403517b7a67c09129ed19569..d4297043e57244cf797d353089c83b12bfb47df6 100644 --- ql/src/test/queries/clientpositive/authorization_explain.q +++ ql/src/test/queries/clientpositive/authorization_explain.q @@ -2,7 +2,6 @@ set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.autho set hive.mapred.mode=nonstrict; set hive.security.authorization.enabled=true; --- JAVA_VERSION_SPECIFIC_OUTPUT explain authorization select * from src join srcpart; explain formatted authorization select * from src join srcpart; diff --git ql/src/test/queries/clientpositive/avro_date.q ql/src/test/queries/clientpositive/avro_date.q index 15c07de0c256a6e317ab5386e90eaf8ee3b9b8fd..716982218267edb349b7a0b2a62cbbbbbbd492ee 100644 --- ql/src/test/queries/clientpositive/avro_date.q +++ ql/src/test/queries/clientpositive/avro_date.q @@ -1,5 +1,4 @@ set hive.mapred.mode=nonstrict; --- JAVA_VERSION_SPECIFIC_OUTPUT DROP TABLE avro_date_staging; DROP TABLE avro_date; diff --git ql/src/test/queries/clientpositive/avro_deserialize_map_null.q ql/src/test/queries/clientpositive/avro_deserialize_map_null.q index 962e649d2cd5149aec24d2020bbce986e321cc60..42258d9a025fc2835fbc5eddef8cebb894358789 100644 --- ql/src/test/queries/clientpositive/avro_deserialize_map_null.q +++ ql/src/test/queries/clientpositive/avro_deserialize_map_null.q @@ -4,7 +4,6 @@ -- fileSchema = [{ "type" : "map", "values" : ["string","null"]}, "null"] -- recordSchema = ["null", { "type" : "map", "values" : ["string","null"]}] --- JAVA_VERSION_SPECIFIC_OUTPUT DROP TABLE IF EXISTS avro_table; diff --git ql/src/test/queries/clientpositive/avro_nullable_fields.q ql/src/test/queries/clientpositive/avro_nullable_fields.q index 9ba744178b6dc1a7f2e7a8294f8056a76889edc7..cb398d6ef18d87d29c61f7bd0b8022ec290b5b42 100644 --- ql/src/test/queries/clientpositive/avro_nullable_fields.q +++ ql/src/test/queries/clientpositive/avro_nullable_fields.q @@ -1,6 +1,5 @@ -- Verify that nullable fields properly work --- JAVA_VERSION_SPECIFIC_OUTPUT CREATE TABLE test_serializer(string1 STRING, int1 INT, diff --git ql/src/test/queries/clientpositive/avro_timestamp.q ql/src/test/queries/clientpositive/avro_timestamp.q index 7bf0dc8900e917ea5d84e534a66450218273893f..847f2506d54cd270420f90fd5a879f97e9284f98 100644 --- ql/src/test/queries/clientpositive/avro_timestamp.q +++ ql/src/test/queries/clientpositive/avro_timestamp.q @@ -1,7 +1,6 @@ set hive.mapred.mode=nonstrict; -- Exclude test on Windows due to space character being escaped in Hive paths on Windows. -- EXCLUDE_OS_WINDOWS --- JAVA_VERSION_SPECIFIC_OUTPUT DROP TABLE avro_timestamp_staging; DROP TABLE avro_timestamp; diff --git ql/src/test/queries/clientpositive/cbo_rp_outer_join_ppr.q ql/src/test/queries/clientpositive/cbo_rp_outer_join_ppr.q index c497ce99624452797b93080aef6733d99bda48ff..d8f726e9624b3d417f4b6c49480cea0433c7d257 100644 --- ql/src/test/queries/clientpositive/cbo_rp_outer_join_ppr.q +++ ql/src/test/queries/clientpositive/cbo_rp_outer_join_ppr.q @@ -4,7 +4,6 @@ set hive.cbo.returnpath.hiveop=true; set hive.optimize.ppd=true; -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT EXPLAIN EXTENDED FROM diff --git ql/src/test/queries/clientpositive/char_udf1.q ql/src/test/queries/clientpositive/char_udf1.q index 09012b450d3441a73d05f3c38fc6b25ec7f22e6d..39aa0e0e17eb5b81dbba8f874269c2dcee2b2f07 100644 --- ql/src/test/queries/clientpositive/char_udf1.q +++ ql/src/test/queries/clientpositive/char_udf1.q @@ -4,7 +4,6 @@ create table char_udf_1 (c1 string, c2 string, c3 char(10), c4 char(20)); insert overwrite table char_udf_1 select key, value, key, value from src where key = '238' limit 1; --- JAVA_VERSION_SPECIFIC_OUTPUT -- UDFs with char support select diff --git ql/src/test/queries/clientpositive/input4.q ql/src/test/queries/clientpositive/input4.q index 83edbe2021803330e5a491160917f4f9df1b6baf..90fcbdd695602b6ab24bba40892ffa13de1ab90a 100644 --- ql/src/test/queries/clientpositive/input4.q +++ ql/src/test/queries/clientpositive/input4.q @@ -1,4 +1,3 @@ --- JAVA_VERSION_SPECIFIC_OUTPUT CREATE TABLE INPUT4(KEY STRING, VALUE STRING) STORED AS TEXTFILE; EXPLAIN diff --git ql/src/test/queries/clientpositive/join0.q ql/src/test/queries/clientpositive/join0.q index 66f2ef355a537e463f4da0128ed6894620f16b04..3252847703a03aad77069e9c29ee3af650fbe0cd 100644 --- ql/src/test/queries/clientpositive/join0.q +++ ql/src/test/queries/clientpositive/join0.q @@ -1,6 +1,5 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; --- JAVA_VERSION_SPECIFIC_OUTPUT -- SORT_QUERY_RESULTS EXPLAIN diff --git ql/src/test/queries/clientpositive/list_bucket_dml_10.q ql/src/test/queries/clientpositive/list_bucket_dml_10.q index 5d3dade61d106023f2f2385643b268da3af0c5cf..f25c174692a891e3d00174ce89b28ac5f083a11f 100644 --- ql/src/test/queries/clientpositive/list_bucket_dml_10.q +++ ql/src/test/queries/clientpositive/list_bucket_dml_10.q @@ -1,7 +1,6 @@ set mapred.input.dir.recursive=true; -- run this test case in minimr to ensure it works in cluster --- JAVA_VERSION_SPECIFIC_OUTPUT -- list bucketing DML: static partition. multiple skewed columns. -- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: diff --git ql/src/test/queries/clientpositive/list_bucket_dml_11.q ql/src/test/queries/clientpositive/list_bucket_dml_11.q index 2d22d6646c11a3bbec455f207babb8edb154ba7d..8ac162768f1f8271611cd7a8304d6197b6fb64d9 100644 --- ql/src/test/queries/clientpositive/list_bucket_dml_11.q +++ ql/src/test/queries/clientpositive/list_bucket_dml_11.q @@ -6,7 +6,6 @@ set hive.merge.mapredfiles=false; -- Ensure it works if skewed column is not the first column in the table columns -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- JAVA_VERSION_SPECIFIC_OUTPUT -- list bucketing DML: static partition. multiple skewed columns. diff --git ql/src/test/queries/clientpositive/list_bucket_dml_12.q ql/src/test/queries/clientpositive/list_bucket_dml_12.q index ac063cc194eee468a1b20d9b1b39b1c216ce6f7a..9facfa5982c3f0a819e9fe6106ee75415d85cf30 100644 --- ql/src/test/queries/clientpositive/list_bucket_dml_12.q +++ ql/src/test/queries/clientpositive/list_bucket_dml_12.q @@ -7,7 +7,6 @@ set hive.merge.mapredfiles=false; -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT -- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) diff --git ql/src/test/queries/clientpositive/list_bucket_dml_13.q ql/src/test/queries/clientpositive/list_bucket_dml_13.q index d68ca93845385688e095162c3b0d223f2847e812..0fe7f612d0595e0d05f6b12ca12c900bd58103cc 100644 --- ql/src/test/queries/clientpositive/list_bucket_dml_13.q +++ ql/src/test/queries/clientpositive/list_bucket_dml_13.q @@ -7,7 +7,6 @@ set hive.merge.mapredfiles=false; -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT -- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) diff --git ql/src/test/queries/clientpositive/list_bucket_dml_2.q ql/src/test/queries/clientpositive/list_bucket_dml_2.q index 263a002cd4db123f9c33217c61175681f0006b9f..c6dceab736c3e8703a7ab2f06fd5724c79b9ba3d 100644 --- ql/src/test/queries/clientpositive/list_bucket_dml_2.q +++ ql/src/test/queries/clientpositive/list_bucket_dml_2.q @@ -10,7 +10,6 @@ set hive.stats.reliable=true; -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT -- list bucketing DML: static partition. multiple skewed columns. -- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: diff --git ql/src/test/queries/clientpositive/list_bucket_dml_4.q ql/src/test/queries/clientpositive/list_bucket_dml_4.q index 86ff342e11117e4dc014399780800764f8277e23..950409d8c8374f881bc9ba7df536af0a389900df 100644 --- ql/src/test/queries/clientpositive/list_bucket_dml_4.q +++ ql/src/test/queries/clientpositive/list_bucket_dml_4.q @@ -9,7 +9,6 @@ set hive.merge.mapredfiles=false; -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT -- list bucketing DML: static partition. multiple skewed columns. merge. -- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: diff --git ql/src/test/queries/clientpositive/list_bucket_dml_5.q ql/src/test/queries/clientpositive/list_bucket_dml_5.q index ace7ba95677db8905687be879262672c594b46ec..fce8e2ea514e915ca2e9b1069f364b3c5e6cfef9 100644 --- ql/src/test/queries/clientpositive/list_bucket_dml_5.q +++ ql/src/test/queries/clientpositive/list_bucket_dml_5.q @@ -10,7 +10,6 @@ set mapred.input.dir.recursive=true; -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT -- create a skewed table create table list_bucketing_dynamic_part (key String, value String) diff --git ql/src/test/queries/clientpositive/list_bucket_dml_6.q ql/src/test/queries/clientpositive/list_bucket_dml_6.q index 56847885eac30bde8df99b03dae319a6b4c439ac..631c938ee386664da6c7dff3394e2232df43e81a 100644 --- ql/src/test/queries/clientpositive/list_bucket_dml_6.q +++ ql/src/test/queries/clientpositive/list_bucket_dml_6.q @@ -47,7 +47,6 @@ set hive.merge.mapredfiles=false; -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT -- create a skewed table create table list_bucketing_dynamic_part (key String, value String) diff --git ql/src/test/queries/clientpositive/list_bucket_dml_8.q ql/src/test/queries/clientpositive/list_bucket_dml_8.q index d90454332218b48f31aaf74299a4e86298c81409..6d73896a5488da7d15462e60b98049377b05e467 100644 --- ql/src/test/queries/clientpositive/list_bucket_dml_8.q +++ ql/src/test/queries/clientpositive/list_bucket_dml_8.q @@ -48,7 +48,6 @@ set hive.merge.mapredfiles=false; -- 118 000002_0 -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- JAVA_VERSION_SPECIFIC_OUTPUT -- create a skewed table create table list_bucketing_dynamic_part (key String, value String) diff --git ql/src/test/queries/clientpositive/list_bucket_dml_9.q ql/src/test/queries/clientpositive/list_bucket_dml_9.q index 620750cef5fe57ebdb59ac180dcfd397056df25a..d2e24af481c46ff0ed6cba7af627b6d9a9b9211e 100644 --- ql/src/test/queries/clientpositive/list_bucket_dml_9.q +++ ql/src/test/queries/clientpositive/list_bucket_dml_9.q @@ -9,7 +9,6 @@ set hive.merge.mapredfiles=false; -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT -- list bucketing DML: static partition. multiple skewed columns. merge. -- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: diff --git ql/src/test/queries/clientpositive/outer_join_ppr.q ql/src/test/queries/clientpositive/outer_join_ppr.q index 497a4d1dc3c36bd78eca7c43410383885b756a97..60a06ae11ee8e1bd6f138cfbafcfc8d38cc85ac2 100644 --- ql/src/test/queries/clientpositive/outer_join_ppr.q +++ ql/src/test/queries/clientpositive/outer_join_ppr.q @@ -2,7 +2,6 @@ set hive.mapred.mode=nonstrict; set hive.optimize.ppd=true; -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT EXPLAIN EXTENDED FROM diff --git ql/src/test/queries/clientpositive/parquet_map_null.q ql/src/test/queries/clientpositive/parquet_map_null.q index 61058f319d3189eb5c68032548b4d97d65f133cc..e154159934c208a87367707b7925fa159c46c968 100644 --- ql/src/test/queries/clientpositive/parquet_map_null.q +++ ql/src/test/queries/clientpositive/parquet_map_null.q @@ -1,5 +1,4 @@ -- This test attempts to write a parquet table from an avro table that contains map null values --- JAVA_VERSION_SPECIFIC_OUTPUT DROP TABLE IF EXISTS avro_table; DROP TABLE IF EXISTS parquet_table; diff --git ql/src/test/queries/clientpositive/plan_json.q ql/src/test/queries/clientpositive/plan_json.q index aa2b134fe238ef680cdfe80bff3eb26c6627b4bf..503b55d8b9bf6178a74ba9ec4c9343457dcfbf31 100644 --- ql/src/test/queries/clientpositive/plan_json.q +++ ql/src/test/queries/clientpositive/plan_json.q @@ -1,5 +1,4 @@ -- explain plan json: the query gets the formatted json output of the query plan of the hive query --- JAVA_VERSION_SPECIFIC_OUTPUT EXPLAIN FORMATTED SELECT count(1) FROM src; diff --git ql/src/test/queries/clientpositive/stats_list_bucket.q ql/src/test/queries/clientpositive/stats_list_bucket.q index 51137a81595f7b35f9ea5c3a99f09593bf5d534f..536702c1cd3aeaccc539cc0920f31fb9c271edb4 100644 --- ql/src/test/queries/clientpositive/stats_list_bucket.q +++ ql/src/test/queries/clientpositive/stats_list_bucket.q @@ -1,6 +1,5 @@ -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- JAVA_VERSION_SPECIFIC_OUTPUT drop table stats_list_bucket; drop table stats_list_bucket_1; diff --git ql/src/test/queries/clientpositive/str_to_map.q ql/src/test/queries/clientpositive/str_to_map.q index f2993b1780f5e488c009610650c3b45ffff11a6b..3280d899d2815e5062c5f222ead989d62a27b756 100644 --- ql/src/test/queries/clientpositive/str_to_map.q +++ ql/src/test/queries/clientpositive/str_to_map.q @@ -1,7 +1,6 @@ set hive.mapred.mode=nonstrict; set hive.fetch.task.conversion=more; --- JAVA_VERSION_SPECIFIC_OUTPUT desc function str_to_map; desc function extended str_to_map; diff --git ql/src/test/queries/clientpositive/subquery_multiinsert.q ql/src/test/queries/clientpositive/subquery_multiinsert.q index bea2e13f068374bcc8e1dd04b1bb8466dbdf3d98..9d70f51ec268c2f4c699adfa3d499cfa26f280bd 100644 --- ql/src/test/queries/clientpositive/subquery_multiinsert.q +++ ql/src/test/queries/clientpositive/subquery_multiinsert.q @@ -2,7 +2,6 @@ set hive.mapred.mode=nonstrict; set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.apache.hadoop.hive.ql.hooks.PrintCompletedTasksHook; -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT CREATE TABLE src_4( key STRING, diff --git ql/src/test/queries/clientpositive/subquery_notin_having.q ql/src/test/queries/clientpositive/subquery_notin_having.q index 8b2914d6eb0d753b32a6328f23f511895640a7ab..05148dfcc575df4e56b227e9fa77d9794346bdc5 100644 --- ql/src/test/queries/clientpositive/subquery_notin_having.q +++ ql/src/test/queries/clientpositive/subquery_notin_having.q @@ -1,6 +1,5 @@ set hive.mapred.mode=nonstrict; -- non agg, non corr --- JAVA_VERSION_SPECIFIC_OUTPUT explain select key, count(*) diff --git ql/src/test/queries/clientpositive/varchar_udf1.q ql/src/test/queries/clientpositive/varchar_udf1.q index ff40b310dc0e6687f9db369cd452cb1b1860ead2..4d1f884ea7460ad5116ed4717a23453473a13ae8 100644 --- ql/src/test/queries/clientpositive/varchar_udf1.q +++ ql/src/test/queries/clientpositive/varchar_udf1.q @@ -4,7 +4,6 @@ create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20) insert overwrite table varchar_udf_1 select key, value, key, value from src where key = '238' limit 1; --- JAVA_VERSION_SPECIFIC_OUTPUT -- UDFs with varchar support select diff --git ql/src/test/queries/clientpositive/vector_cast_constant.q ql/src/test/queries/clientpositive/vector_cast_constant.q index c50dd8f4c6ba26ec5b18fcc7fb4536d4a2b39f15..94bee09a4b8e0c13a2e1564b09cbf7333c4d401b 100644 --- ql/src/test/queries/clientpositive/vector_cast_constant.q +++ ql/src/test/queries/clientpositive/vector_cast_constant.q @@ -2,7 +2,6 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; --- JAVA_VERSION_SPECIFIC_OUTPUT DROP TABLE over1k; DROP TABLE over1korc; diff --git ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.java1.7.out ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.java1.7.out deleted file mode 100644 index 4ea70e3c9e7e63b7143d5cbf45251d7727d521f8..0000000000000000000000000000000000000000 --- ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.java1.7.out +++ /dev/null @@ -1,73 +0,0 @@ -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE Employee_Part -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE Employee_Part -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) -row format delimited fields terminated by '|' stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@Employee_Part -POSTHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) -row format delimited fields terminated by '|' stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@Employee_Part -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK -FAILED: SemanticException [Error 30007]: Invalid partitioning key/value specified in ANALYZE statement : {employeesalary=4000.0, country=Canada} diff --git ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.java1.8.out ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.java1.8.out deleted file mode 100644 index 7cae55e445f3b516bff697fa2f47cac1ba109291..0000000000000000000000000000000000000000 --- ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.java1.8.out +++ /dev/null @@ -1,73 +0,0 @@ -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE Employee_Part -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE Employee_Part -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) -row format delimited fields terminated by '|' stored as textfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@Employee_Part -POSTHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) -row format delimited fields terminated by '|' stored as textfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@Employee_Part -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@employee_part -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@employee_part -POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK -FAILED: SemanticException [Error 30007]: Invalid partitioning key/value specified in ANALYZE statement : {country=Canada, employeesalary=4000.0} diff --git ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.out ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.out new file mode 100644 index 0000000000000000000000000000000000000000..3261f78fab2341208c61d3cd5728f9a9d710ec80 --- /dev/null +++ ql/src/test/results/clientnegative/columnstats_partlvl_invalid_values.q.out @@ -0,0 +1,69 @@ +PREHOOK: query: DROP TABLE Employee_Part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE Employee_Part +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@Employee_Part +POSTHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@Employee_Part +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +FAILED: SemanticException [Error 30007]: Invalid partitioning key/value specified in ANALYZE statement : {employeesalary=4000.0, country=Canada} diff --git ql/src/test/results/clientpositive/authorization_explain.q.java1.7.out ql/src/test/results/clientpositive/authorization_explain.q.java1.7.out deleted file mode 100644 index a9ed0495fcecadbddf1fcfb764e916fbb5406662..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/authorization_explain.q.java1.7.out +++ /dev/null @@ -1,44 +0,0 @@ -Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -explain authorization select * from src join srcpart -PREHOOK: type: QUERY -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -explain authorization select * from src join srcpart -POSTHOOK: type: QUERY -INPUTS: - default@src - default@srcpart - default@srcpart@ds=2008-04-08/hr=11 - default@srcpart@ds=2008-04-08/hr=12 - default@srcpart@ds=2008-04-09/hr=11 - default@srcpart@ds=2008-04-09/hr=12 -OUTPUTS: -#### A masked pattern was here #### -CURRENT_USER: - hive_test_user -OPERATION: - QUERY -Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -PREHOOK: query: explain formatted authorization select * from src join srcpart -PREHOOK: type: QUERY -POSTHOOK: query: explain formatted authorization select * from src join srcpart -POSTHOOK: type: QUERY -#### A masked pattern was here #### -PREHOOK: query: explain authorization use default -PREHOOK: type: SWITCHDATABASE -POSTHOOK: query: explain authorization use default -POSTHOOK: type: SWITCHDATABASE -INPUTS: - database:default -OUTPUTS: -CURRENT_USER: - hive_test_user -OPERATION: - SWITCHDATABASE -PREHOOK: query: explain formatted authorization use default -PREHOOK: type: SWITCHDATABASE -POSTHOOK: query: explain formatted authorization use default -POSTHOOK: type: SWITCHDATABASE -{"INPUTS":["database:default"],"OUTPUTS":[],"CURRENT_USER":"hive_test_user","OPERATION":"SWITCHDATABASE"} diff --git ql/src/test/results/clientpositive/authorization_explain.q.java1.8.out ql/src/test/results/clientpositive/authorization_explain.q.java1.8.out deleted file mode 100644 index b7ec20981a9322cedcfbdc8da5b1bf0946c98bfe..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/authorization_explain.q.java1.8.out +++ /dev/null @@ -1,47 +0,0 @@ -Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -explain authorization select * from src join srcpart -PREHOOK: type: QUERY -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -explain authorization select * from src join srcpart -POSTHOOK: type: QUERY -INPUTS: - default@src - default@srcpart - default@srcpart@ds=2008-04-08/hr=11 - default@srcpart@ds=2008-04-08/hr=12 - default@srcpart@ds=2008-04-09/hr=11 - default@srcpart@ds=2008-04-09/hr=12 -OUTPUTS: -#### A masked pattern was here #### -CURRENT_USER: - hive_test_user -OPERATION: - QUERY -AUTHORIZATION_FAILURES: - No privilege 'Select' found for inputs { database:default, table:src, columnName:key} - No privilege 'Select' found for inputs { database:default, table:srcpart, columnName:key} -Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -PREHOOK: query: explain formatted authorization select * from src join srcpart -PREHOOK: type: QUERY -POSTHOOK: query: explain formatted authorization select * from src join srcpart -POSTHOOK: type: QUERY -#### A masked pattern was here #### -PREHOOK: query: explain authorization use default -PREHOOK: type: SWITCHDATABASE -POSTHOOK: query: explain authorization use default -POSTHOOK: type: SWITCHDATABASE -INPUTS: - database:default -OUTPUTS: -CURRENT_USER: - hive_test_user -OPERATION: - SWITCHDATABASE -PREHOOK: query: explain formatted authorization use default -PREHOOK: type: SWITCHDATABASE -POSTHOOK: query: explain formatted authorization use default -POSTHOOK: type: SWITCHDATABASE -{"INPUTS":["database:default"],"OUTPUTS":[],"CURRENT_USER":"hive_test_user","OPERATION":"SWITCHDATABASE"} diff --git ql/src/test/results/clientpositive/authorization_explain.q.out ql/src/test/results/clientpositive/authorization_explain.q.out new file mode 100644 index 0000000000000000000000000000000000000000..460b3b1df0fa1c2f3253b00ff6574cc97670372e --- /dev/null +++ ql/src/test/results/clientpositive/authorization_explain.q.out @@ -0,0 +1,40 @@ +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain authorization select * from src join srcpart +PREHOOK: type: QUERY +POSTHOOK: query: explain authorization select * from src join srcpart +POSTHOOK: type: QUERY +INPUTS: + default@src + default@srcpart + default@srcpart@ds=2008-04-08/hr=11 + default@srcpart@ds=2008-04-08/hr=12 + default@srcpart@ds=2008-04-09/hr=11 + default@srcpart@ds=2008-04-09/hr=12 +OUTPUTS: +#### A masked pattern was here #### +CURRENT_USER: + hive_test_user +OPERATION: + QUERY +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain formatted authorization select * from src join srcpart +PREHOOK: type: QUERY +POSTHOOK: query: explain formatted authorization select * from src join srcpart +POSTHOOK: type: QUERY +#### A masked pattern was here #### +PREHOOK: query: explain authorization use default +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: explain authorization use default +POSTHOOK: type: SWITCHDATABASE +INPUTS: + database:default +OUTPUTS: +CURRENT_USER: + hive_test_user +OPERATION: + SWITCHDATABASE +PREHOOK: query: explain formatted authorization use default +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: explain formatted authorization use default +POSTHOOK: type: SWITCHDATABASE +{"INPUTS":["database:default"],"OUTPUTS":[],"CURRENT_USER":"hive_test_user","OPERATION":"SWITCHDATABASE"} diff --git ql/src/test/results/clientpositive/avro_date.q.java1.7.out ql/src/test/results/clientpositive/avro_date.q.java1.7.out deleted file mode 100644 index 501b983a05917502e5f06c2cbed61d4240f4aa8f..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/avro_date.q.java1.7.out +++ /dev/null @@ -1,130 +0,0 @@ -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE avro_date_staging -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE avro_date_staging -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE avro_date -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE avro_date -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE avro_date_casts -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE avro_date_casts -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE avro_date_staging (d date, m1 map, l1 array) - ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' - COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' - STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@avro_date_staging -POSTHOOK: query: CREATE TABLE avro_date_staging (d date, m1 map, l1 array) - ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' - COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' - STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@avro_date_staging -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/avro_date.txt' OVERWRITE INTO TABLE avro_date_staging -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@avro_date_staging -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/avro_date.txt' OVERWRITE INTO TABLE avro_date_staging -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@avro_date_staging -PREHOOK: query: CREATE TABLE avro_date (d date, m1 map, l1 array) - PARTITIONED BY (p1 int, p2 date) - ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' - COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' - STORED AS AVRO -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@avro_date -POSTHOOK: query: CREATE TABLE avro_date (d date, m1 map, l1 array) - PARTITIONED BY (p1 int, p2 date) - ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' - COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' - STORED AS AVRO -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@avro_date -PREHOOK: query: INSERT OVERWRITE TABLE avro_date PARTITION(p1=2, p2='2014-09-26') SELECT * FROM avro_date_staging -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_date_staging -PREHOOK: Output: default@avro_date@p1=2/p2=2014-09-26 -POSTHOOK: query: INSERT OVERWRITE TABLE avro_date PARTITION(p1=2, p2='2014-09-26') SELECT * FROM avro_date_staging -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_date_staging -POSTHOOK: Output: default@avro_date@p1=2/p2=2014-09-26 -POSTHOOK: Lineage: avro_date PARTITION(p1=2,p2=2014-09-26).d SIMPLE [(avro_date_staging)avro_date_staging.FieldSchema(name:d, type:date, comment:null), ] -POSTHOOK: Lineage: avro_date PARTITION(p1=2,p2=2014-09-26).l1 SIMPLE [(avro_date_staging)avro_date_staging.FieldSchema(name:l1, type:array, comment:null), ] -POSTHOOK: Lineage: avro_date PARTITION(p1=2,p2=2014-09-26).m1 SIMPLE [(avro_date_staging)avro_date_staging.FieldSchema(name:m1, type:map, comment:null), ] -PREHOOK: query: SELECT * FROM avro_date -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_date -PREHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_date -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_date -POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -2012-02-21 {"foo":"1980-12-16","bar":"1998-05-07"} ["2011-09-04","2011-09-05"] 2 2014-09-26 -2014-02-11 {"baz":"1981-12-16"} ["2011-09-05"] 2 2014-09-26 -1947-02-11 {"baz":"1921-12-16"} ["2011-09-05"] 2 2014-09-26 -8200-02-11 {"baz":"6981-12-16"} ["1039-09-05"] 2 2014-09-26 -PREHOOK: query: SELECT d, COUNT(d) FROM avro_date GROUP BY d -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_date -PREHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -POSTHOOK: query: SELECT d, COUNT(d) FROM avro_date GROUP BY d -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_date -POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -1947-02-11 1 -2012-02-21 1 -2014-02-11 1 -8200-02-11 1 -PREHOOK: query: SELECT * FROM avro_date WHERE d!='1947-02-11' -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_date -PREHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_date WHERE d!='1947-02-11' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_date -POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -2012-02-21 {"foo":"1980-12-16","bar":"1998-05-07"} ["2011-09-04","2011-09-05"] 2 2014-09-26 -2014-02-11 {"baz":"1981-12-16"} ["2011-09-05"] 2 2014-09-26 -8200-02-11 {"baz":"6981-12-16"} ["1039-09-05"] 2 2014-09-26 -PREHOOK: query: SELECT * FROM avro_date WHERE d<'2014-12-21' -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_date -PREHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_date WHERE d<'2014-12-21' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_date -POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -2012-02-21 {"foo":"1980-12-16","bar":"1998-05-07"} ["2011-09-04","2011-09-05"] 2 2014-09-26 -2014-02-11 {"baz":"1981-12-16"} ["2011-09-05"] 2 2014-09-26 -1947-02-11 {"baz":"1921-12-16"} ["2011-09-05"] 2 2014-09-26 -PREHOOK: query: SELECT * FROM avro_date WHERE d>'8000-12-01' -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_date -PREHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_date WHERE d>'8000-12-01' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_date -POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -8200-02-11 {"baz":"6981-12-16"} ["1039-09-05"] 2 2014-09-26 diff --git ql/src/test/results/clientpositive/avro_date.q.java1.8.out ql/src/test/results/clientpositive/avro_date.q.java1.8.out deleted file mode 100644 index dea51c6b0d1603908e9518250032fa4d95c02f8b..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/avro_date.q.java1.8.out +++ /dev/null @@ -1,130 +0,0 @@ -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE avro_date_staging -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE avro_date_staging -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE avro_date -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE avro_date -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE avro_date_casts -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE avro_date_casts -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE avro_date_staging (d date, m1 map, l1 array) - ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' - COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' - STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@avro_date_staging -POSTHOOK: query: CREATE TABLE avro_date_staging (d date, m1 map, l1 array) - ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' - COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' - STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@avro_date_staging -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/avro_date.txt' OVERWRITE INTO TABLE avro_date_staging -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@avro_date_staging -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/avro_date.txt' OVERWRITE INTO TABLE avro_date_staging -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@avro_date_staging -PREHOOK: query: CREATE TABLE avro_date (d date, m1 map, l1 array) - PARTITIONED BY (p1 int, p2 date) - ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' - COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' - STORED AS AVRO -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@avro_date -POSTHOOK: query: CREATE TABLE avro_date (d date, m1 map, l1 array) - PARTITIONED BY (p1 int, p2 date) - ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' - COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' - STORED AS AVRO -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@avro_date -PREHOOK: query: INSERT OVERWRITE TABLE avro_date PARTITION(p1=2, p2='2014-09-26') SELECT * FROM avro_date_staging -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_date_staging -PREHOOK: Output: default@avro_date@p1=2/p2=2014-09-26 -POSTHOOK: query: INSERT OVERWRITE TABLE avro_date PARTITION(p1=2, p2='2014-09-26') SELECT * FROM avro_date_staging -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_date_staging -POSTHOOK: Output: default@avro_date@p1=2/p2=2014-09-26 -POSTHOOK: Lineage: avro_date PARTITION(p1=2,p2=2014-09-26).d SIMPLE [(avro_date_staging)avro_date_staging.FieldSchema(name:d, type:date, comment:null), ] -POSTHOOK: Lineage: avro_date PARTITION(p1=2,p2=2014-09-26).l1 SIMPLE [(avro_date_staging)avro_date_staging.FieldSchema(name:l1, type:array, comment:null), ] -POSTHOOK: Lineage: avro_date PARTITION(p1=2,p2=2014-09-26).m1 SIMPLE [(avro_date_staging)avro_date_staging.FieldSchema(name:m1, type:map, comment:null), ] -PREHOOK: query: SELECT * FROM avro_date -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_date -PREHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_date -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_date -POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -2012-02-21 {"bar":"1998-05-07","foo":"1980-12-16"} ["2011-09-04","2011-09-05"] 2 2014-09-26 -2014-02-11 {"baz":"1981-12-16"} ["2011-09-05"] 2 2014-09-26 -1947-02-11 {"baz":"1921-12-16"} ["2011-09-05"] 2 2014-09-26 -8200-02-11 {"baz":"6981-12-16"} ["1039-09-05"] 2 2014-09-26 -PREHOOK: query: SELECT d, COUNT(d) FROM avro_date GROUP BY d -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_date -PREHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -POSTHOOK: query: SELECT d, COUNT(d) FROM avro_date GROUP BY d -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_date -POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -1947-02-11 1 -2012-02-21 1 -2014-02-11 1 -8200-02-11 1 -PREHOOK: query: SELECT * FROM avro_date WHERE d!='1947-02-11' -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_date -PREHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_date WHERE d!='1947-02-11' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_date -POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -2012-02-21 {"bar":"1998-05-07","foo":"1980-12-16"} ["2011-09-04","2011-09-05"] 2 2014-09-26 -2014-02-11 {"baz":"1981-12-16"} ["2011-09-05"] 2 2014-09-26 -8200-02-11 {"baz":"6981-12-16"} ["1039-09-05"] 2 2014-09-26 -PREHOOK: query: SELECT * FROM avro_date WHERE d<'2014-12-21' -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_date -PREHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_date WHERE d<'2014-12-21' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_date -POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -2012-02-21 {"bar":"1998-05-07","foo":"1980-12-16"} ["2011-09-04","2011-09-05"] 2 2014-09-26 -2014-02-11 {"baz":"1981-12-16"} ["2011-09-05"] 2 2014-09-26 -1947-02-11 {"baz":"1921-12-16"} ["2011-09-05"] 2 2014-09-26 -PREHOOK: query: SELECT * FROM avro_date WHERE d>'8000-12-01' -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_date -PREHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_date WHERE d>'8000-12-01' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_date -POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 -#### A masked pattern was here #### -8200-02-11 {"baz":"6981-12-16"} ["1039-09-05"] 2 2014-09-26 diff --git ql/src/test/results/clientpositive/avro_date.q.out ql/src/test/results/clientpositive/avro_date.q.out new file mode 100644 index 0000000000000000000000000000000000000000..32501cf9f160804e428758d19c15af627e340cb0 --- /dev/null +++ ql/src/test/results/clientpositive/avro_date.q.out @@ -0,0 +1,126 @@ +PREHOOK: query: DROP TABLE avro_date_staging +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE avro_date_staging +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE avro_date +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE avro_date +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE avro_date_casts +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE avro_date_casts +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE avro_date_staging (d date, m1 map, l1 array) + ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' + STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_date_staging +POSTHOOK: query: CREATE TABLE avro_date_staging (d date, m1 map, l1 array) + ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' + STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_date_staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/avro_date.txt' OVERWRITE INTO TABLE avro_date_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@avro_date_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/avro_date.txt' OVERWRITE INTO TABLE avro_date_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@avro_date_staging +PREHOOK: query: CREATE TABLE avro_date (d date, m1 map, l1 array) + PARTITIONED BY (p1 int, p2 date) + ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' + STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_date +POSTHOOK: query: CREATE TABLE avro_date (d date, m1 map, l1 array) + PARTITIONED BY (p1 int, p2 date) + ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' + STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_date +PREHOOK: query: INSERT OVERWRITE TABLE avro_date PARTITION(p1=2, p2='2014-09-26') SELECT * FROM avro_date_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_date_staging +PREHOOK: Output: default@avro_date@p1=2/p2=2014-09-26 +POSTHOOK: query: INSERT OVERWRITE TABLE avro_date PARTITION(p1=2, p2='2014-09-26') SELECT * FROM avro_date_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_date_staging +POSTHOOK: Output: default@avro_date@p1=2/p2=2014-09-26 +POSTHOOK: Lineage: avro_date PARTITION(p1=2,p2=2014-09-26).d SIMPLE [(avro_date_staging)avro_date_staging.FieldSchema(name:d, type:date, comment:null), ] +POSTHOOK: Lineage: avro_date PARTITION(p1=2,p2=2014-09-26).l1 SIMPLE [(avro_date_staging)avro_date_staging.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: avro_date PARTITION(p1=2,p2=2014-09-26).m1 SIMPLE [(avro_date_staging)avro_date_staging.FieldSchema(name:m1, type:map, comment:null), ] +PREHOOK: query: SELECT * FROM avro_date +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_date +PREHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM avro_date +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_date +POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 +#### A masked pattern was here #### +2012-02-21 {"bar":"1998-05-07","foo":"1980-12-16"} ["2011-09-04","2011-09-05"] 2 2014-09-26 +2014-02-11 {"baz":"1981-12-16"} ["2011-09-05"] 2 2014-09-26 +1947-02-11 {"baz":"1921-12-16"} ["2011-09-05"] 2 2014-09-26 +8200-02-11 {"baz":"6981-12-16"} ["1039-09-05"] 2 2014-09-26 +PREHOOK: query: SELECT d, COUNT(d) FROM avro_date GROUP BY d +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_date +PREHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 +#### A masked pattern was here #### +POSTHOOK: query: SELECT d, COUNT(d) FROM avro_date GROUP BY d +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_date +POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 +#### A masked pattern was here #### +1947-02-11 1 +2012-02-21 1 +2014-02-11 1 +8200-02-11 1 +PREHOOK: query: SELECT * FROM avro_date WHERE d!='1947-02-11' +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_date +PREHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM avro_date WHERE d!='1947-02-11' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_date +POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 +#### A masked pattern was here #### +2012-02-21 {"bar":"1998-05-07","foo":"1980-12-16"} ["2011-09-04","2011-09-05"] 2 2014-09-26 +2014-02-11 {"baz":"1981-12-16"} ["2011-09-05"] 2 2014-09-26 +8200-02-11 {"baz":"6981-12-16"} ["1039-09-05"] 2 2014-09-26 +PREHOOK: query: SELECT * FROM avro_date WHERE d<'2014-12-21' +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_date +PREHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM avro_date WHERE d<'2014-12-21' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_date +POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 +#### A masked pattern was here #### +2012-02-21 {"bar":"1998-05-07","foo":"1980-12-16"} ["2011-09-04","2011-09-05"] 2 2014-09-26 +2014-02-11 {"baz":"1981-12-16"} ["2011-09-05"] 2 2014-09-26 +1947-02-11 {"baz":"1921-12-16"} ["2011-09-05"] 2 2014-09-26 +PREHOOK: query: SELECT * FROM avro_date WHERE d>'8000-12-01' +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_date +PREHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM avro_date WHERE d>'8000-12-01' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_date +POSTHOOK: Input: default@avro_date@p1=2/p2=2014-09-26 +#### A masked pattern was here #### +8200-02-11 {"baz":"6981-12-16"} ["1039-09-05"] 2 2014-09-26 diff --git ql/src/test/results/clientpositive/avro_deserialize_map_null.q.java1.7.out ql/src/test/results/clientpositive/avro_deserialize_map_null.q.java1.7.out deleted file mode 100644 index 8f8065eebab04914b980a23637436a52bd2e0051..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/avro_deserialize_map_null.q.java1.7.out +++ /dev/null @@ -1,57 +0,0 @@ -PREHOOK: query: -- These test attempts to deserialize an Avro file that contains map null values, and the file schema --- vs record schema have the null values in different positions --- i.e. --- fileSchema = [{ "type" : "map", "values" : ["string","null"]}, "null"] --- recordSchema = ["null", { "type" : "map", "values" : ["string","null"]}] - --- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE IF EXISTS avro_table -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- These test attempts to deserialize an Avro file that contains map null values, and the file schema --- vs record schema have the null values in different positions --- i.e. --- fileSchema = [{ "type" : "map", "values" : ["string","null"]}, "null"] --- recordSchema = ["null", { "type" : "map", "values" : ["string","null"]}] - --- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE IF EXISTS avro_table -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@avro_table -POSTHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@avro_table -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@avro_table -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@avro_table -PREHOOK: query: SELECT * FROM avro_table -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_table -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_table -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_table -#### A masked pattern was here #### -{"key4":null,"key3":"val3"} -{"key4":null,"key3":"val3"} -{"key2":"val2","key1":null} -{"key4":null,"key3":"val3"} -{"key4":null,"key3":"val3"} -PREHOOK: query: DROP TABLE avro_table -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@avro_table -PREHOOK: Output: default@avro_table -POSTHOOK: query: DROP TABLE avro_table -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@avro_table -POSTHOOK: Output: default@avro_table diff --git ql/src/test/results/clientpositive/avro_deserialize_map_null.q.java1.8.out ql/src/test/results/clientpositive/avro_deserialize_map_null.q.java1.8.out deleted file mode 100644 index 127d8b3d37fe6bb14e981f7f8013d2f7e3dd7011..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/avro_deserialize_map_null.q.java1.8.out +++ /dev/null @@ -1,57 +0,0 @@ -PREHOOK: query: -- These test attempts to deserialize an Avro file that contains map null values, and the file schema --- vs record schema have the null values in different positions --- i.e. --- fileSchema = [{ "type" : "map", "values" : ["string","null"]}, "null"] --- recordSchema = ["null", { "type" : "map", "values" : ["string","null"]}] - --- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE IF EXISTS avro_table -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- These test attempts to deserialize an Avro file that contains map null values, and the file schema --- vs record schema have the null values in different positions --- i.e. --- fileSchema = [{ "type" : "map", "values" : ["string","null"]}, "null"] --- recordSchema = ["null", { "type" : "map", "values" : ["string","null"]}] - --- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE IF EXISTS avro_table -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@avro_table -POSTHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@avro_table -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@avro_table -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@avro_table -PREHOOK: query: SELECT * FROM avro_table -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_table -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_table -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_table -#### A masked pattern was here #### -{"key3":"val3","key4":null} -{"key3":"val3","key4":null} -{"key1":null,"key2":"val2"} -{"key3":"val3","key4":null} -{"key3":"val3","key4":null} -PREHOOK: query: DROP TABLE avro_table -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@avro_table -PREHOOK: Output: default@avro_table -POSTHOOK: query: DROP TABLE avro_table -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@avro_table -POSTHOOK: Output: default@avro_table diff --git ql/src/test/results/clientpositive/avro_deserialize_map_null.q.out ql/src/test/results/clientpositive/avro_deserialize_map_null.q.out new file mode 100644 index 0000000000000000000000000000000000000000..2d983f13ed0c13e4a111bc08080b1c1a25892bad --- /dev/null +++ ql/src/test/results/clientpositive/avro_deserialize_map_null.q.out @@ -0,0 +1,55 @@ +PREHOOK: query: -- These test attempts to deserialize an Avro file that contains map null values, and the file schema +-- vs record schema have the null values in different positions +-- i.e. +-- fileSchema = [{ "type" : "map", "values" : ["string","null"]}, "null"] +-- recordSchema = ["null", { "type" : "map", "values" : ["string","null"]}] + + +DROP TABLE IF EXISTS avro_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- These test attempts to deserialize an Avro file that contains map null values, and the file schema +-- vs record schema have the null values in different positions +-- i.e. +-- fileSchema = [{ "type" : "map", "values" : ["string","null"]}, "null"] +-- recordSchema = ["null", { "type" : "map", "values" : ["string","null"]}] + + +DROP TABLE IF EXISTS avro_table +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_table +POSTHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_table +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@avro_table +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@avro_table +PREHOOK: query: SELECT * FROM avro_table +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM avro_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_table +#### A masked pattern was here #### +{"key3":"val3","key4":null} +{"key3":"val3","key4":null} +{"key1":null,"key2":"val2"} +{"key3":"val3","key4":null} +{"key3":"val3","key4":null} +PREHOOK: query: DROP TABLE avro_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@avro_table +PREHOOK: Output: default@avro_table +POSTHOOK: query: DROP TABLE avro_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@avro_table +POSTHOOK: Output: default@avro_table diff --git ql/src/test/results/clientpositive/avro_nullable_fields.q.java1.7.out ql/src/test/results/clientpositive/avro_nullable_fields.q.java1.7.out deleted file mode 100644 index 52b09d46ea18e58653c1a2224322861c0ce14460..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/avro_nullable_fields.q.java1.7.out +++ /dev/null @@ -1,179 +0,0 @@ -PREHOOK: query: -- Verify that nullable fields properly work - --- JAVA_VERSION_SPECIFIC_OUTPUT - -CREATE TABLE test_serializer(string1 STRING, - int1 INT, - tinyint1 TINYINT, - smallint1 SMALLINT, - bigint1 BIGINT, - boolean1 BOOLEAN, - float1 FLOAT, - double1 DOUBLE, - list1 ARRAY, - map1 MAP, - struct1 STRUCT, - enum1 STRING, - nullableint INT, - bytes1 BINARY, - fixed1 BINARY) - ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY ':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n' - STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@test_serializer -POSTHOOK: query: -- Verify that nullable fields properly work - --- JAVA_VERSION_SPECIFIC_OUTPUT - -CREATE TABLE test_serializer(string1 STRING, - int1 INT, - tinyint1 TINYINT, - smallint1 SMALLINT, - bigint1 BIGINT, - boolean1 BOOLEAN, - float1 FLOAT, - double1 DOUBLE, - list1 ARRAY, - map1 MAP, - struct1 STRUCT, - enum1 STRING, - nullableint INT, - bytes1 BINARY, - fixed1 BINARY) - ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY ':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n' - STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@test_serializer -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/csv.txt' INTO TABLE test_serializer -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@test_serializer -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/csv.txt' INTO TABLE test_serializer -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@test_serializer -PREHOOK: query: CREATE TABLE as_avro - ROW FORMAT - SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' - STORED AS - INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' - OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' - TBLPROPERTIES ( - 'avro.schema.literal'='{ - "namespace": "com.howdy", - "name": "some_schema", - "type": "record", - "fields": [ - { "name": "string1", "type": ["null", "string"] }, - { "name": "int1", "type": ["null", "int"] }, - { "name": "tinyint1", "type": ["null", "int"] }, - { "name": "smallint1", "type": ["null", "int"] }, - { "name": "bigint1", "type": ["null", "long"] }, - { "name": "boolean1", "type": ["null", "boolean"] }, - { "name": "float1", "type": ["null", "float"] }, - { "name": "double1", "type": ["null", "double"] }, - { "name": "list1", "type": ["null", {"type": "array", "items": "string"}] }, - { "name": "map1", "type": ["null", {"type": "map", "values": "int"}] }, - { "name": "struct1", "type": ["null", {"type": "record", "name": "struct1_name", "fields": [ - { "name": "sInt", "type": "int" }, - { "name": "sBoolean", "type": "boolean" }, - { "name": "sString", "type": "string" } - ]}] }, - { "name": "enum1", "type": ["null", {"type": "enum", "name": "enum1_values", "symbols": ["BLUE", "RED", "GREEN"]}] }, - { "name": "nullableint", "type": ["null", "int"] }, - { "name": "bytes1", "type": ["null", "bytes"] }, - { "name": "fixed1", "type": ["null", {"type": "fixed", "name": "threebytes", "size": 3}] } - ] - }' - ) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@as_avro -POSTHOOK: query: CREATE TABLE as_avro - ROW FORMAT - SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' - STORED AS - INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' - OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' - TBLPROPERTIES ( - 'avro.schema.literal'='{ - "namespace": "com.howdy", - "name": "some_schema", - "type": "record", - "fields": [ - { "name": "string1", "type": ["null", "string"] }, - { "name": "int1", "type": ["null", "int"] }, - { "name": "tinyint1", "type": ["null", "int"] }, - { "name": "smallint1", "type": ["null", "int"] }, - { "name": "bigint1", "type": ["null", "long"] }, - { "name": "boolean1", "type": ["null", "boolean"] }, - { "name": "float1", "type": ["null", "float"] }, - { "name": "double1", "type": ["null", "double"] }, - { "name": "list1", "type": ["null", {"type": "array", "items": "string"}] }, - { "name": "map1", "type": ["null", {"type": "map", "values": "int"}] }, - { "name": "struct1", "type": ["null", {"type": "record", "name": "struct1_name", "fields": [ - { "name": "sInt", "type": "int" }, - { "name": "sBoolean", "type": "boolean" }, - { "name": "sString", "type": "string" } - ]}] }, - { "name": "enum1", "type": ["null", {"type": "enum", "name": "enum1_values", "symbols": ["BLUE", "RED", "GREEN"]}] }, - { "name": "nullableint", "type": ["null", "int"] }, - { "name": "bytes1", "type": ["null", "bytes"] }, - { "name": "fixed1", "type": ["null", {"type": "fixed", "name": "threebytes", "size": 3}] } - ] - }' - ) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@as_avro -PREHOOK: query: INSERT OVERWRITE TABLE as_avro SELECT * FROM test_serializer -PREHOOK: type: QUERY -PREHOOK: Input: default@test_serializer -PREHOOK: Output: default@as_avro -POSTHOOK: query: INSERT OVERWRITE TABLE as_avro SELECT * FROM test_serializer -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_serializer -POSTHOOK: Output: default@as_avro -POSTHOOK: Lineage: as_avro.bigint1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bigint1, type:bigint, comment:null), ] -POSTHOOK: Lineage: as_avro.boolean1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:boolean1, type:boolean, comment:null), ] -POSTHOOK: Lineage: as_avro.bytes1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bytes1, type:binary, comment:null), ] -POSTHOOK: Lineage: as_avro.double1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:double1, type:double, comment:null), ] -POSTHOOK: Lineage: as_avro.enum1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:enum1, type:string, comment:null), ] -POSTHOOK: Lineage: as_avro.fixed1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:fixed1, type:binary, comment:null), ] -POSTHOOK: Lineage: as_avro.float1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:float1, type:float, comment:null), ] -POSTHOOK: Lineage: as_avro.int1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:int1, type:int, comment:null), ] -POSTHOOK: Lineage: as_avro.list1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:list1, type:array, comment:null), ] -POSTHOOK: Lineage: as_avro.map1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:map1, type:map, comment:null), ] -POSTHOOK: Lineage: as_avro.nullableint SIMPLE [(test_serializer)test_serializer.FieldSchema(name:nullableint, type:int, comment:null), ] -POSTHOOK: Lineage: as_avro.smallint1 EXPRESSION [(test_serializer)test_serializer.FieldSchema(name:smallint1, type:smallint, comment:null), ] -POSTHOOK: Lineage: as_avro.string1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:string1, type:string, comment:null), ] -POSTHOOK: Lineage: as_avro.struct1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:struct1, type:struct, comment:null), ] -POSTHOOK: Lineage: as_avro.tinyint1 EXPRESSION [(test_serializer)test_serializer.FieldSchema(name:tinyint1, type:tinyint, comment:null), ] -PREHOOK: query: SELECT * FROM as_avro -PREHOOK: type: QUERY -PREHOOK: Input: default@as_avro -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM as_avro -POSTHOOK: type: QUERY -POSTHOOK: Input: default@as_avro -#### A masked pattern was here #### -why hello there 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -another record 98 4 101 9999999 false 99.89 9.0E-8 ["beta"] {"Earth":101} {"sint":1134,"sboolean":false,"sstring":"wazzup"} RED NULL  ef -third record 45 5 102 999999999 true 89.99 9.0E-14 ["alpha","gamma"] {"Earth":237,"Bob":723} {"sint":102,"sboolean":false,"sstring":"BNL"} GREEN NULL  hi -NULL 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string NULL 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string 42 NULL 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string 42 3 NULL 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string 42 3 100 NULL true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string 42 3 100 1412341 NULL 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string 42 3 100 1412341 true NULL 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string 42 3 100 1412341 true 42.43 NULL ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string 42 3 100 1412341 true 42.43 85.23423424 NULL {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] NULL {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} NULL BLUE 72  bc -string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} NULL 72  bc -string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE NULL  bc -string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 NULL bc -string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  NULL diff --git ql/src/test/results/clientpositive/avro_nullable_fields.q.java1.8.out ql/src/test/results/clientpositive/avro_nullable_fields.q.java1.8.out deleted file mode 100644 index 3690f7b2817afa1c36f4206fedb8751621c787af..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/avro_nullable_fields.q.java1.8.out +++ /dev/null @@ -1,179 +0,0 @@ -PREHOOK: query: -- Verify that nullable fields properly work - --- JAVA_VERSION_SPECIFIC_OUTPUT - -CREATE TABLE test_serializer(string1 STRING, - int1 INT, - tinyint1 TINYINT, - smallint1 SMALLINT, - bigint1 BIGINT, - boolean1 BOOLEAN, - float1 FLOAT, - double1 DOUBLE, - list1 ARRAY, - map1 MAP, - struct1 STRUCT, - enum1 STRING, - nullableint INT, - bytes1 BINARY, - fixed1 BINARY) - ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY ':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n' - STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@test_serializer -POSTHOOK: query: -- Verify that nullable fields properly work - --- JAVA_VERSION_SPECIFIC_OUTPUT - -CREATE TABLE test_serializer(string1 STRING, - int1 INT, - tinyint1 TINYINT, - smallint1 SMALLINT, - bigint1 BIGINT, - boolean1 BOOLEAN, - float1 FLOAT, - double1 DOUBLE, - list1 ARRAY, - map1 MAP, - struct1 STRUCT, - enum1 STRING, - nullableint INT, - bytes1 BINARY, - fixed1 BINARY) - ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY ':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n' - STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@test_serializer -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/csv.txt' INTO TABLE test_serializer -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@test_serializer -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/csv.txt' INTO TABLE test_serializer -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@test_serializer -PREHOOK: query: CREATE TABLE as_avro - ROW FORMAT - SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' - STORED AS - INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' - OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' - TBLPROPERTIES ( - 'avro.schema.literal'='{ - "namespace": "com.howdy", - "name": "some_schema", - "type": "record", - "fields": [ - { "name": "string1", "type": ["null", "string"] }, - { "name": "int1", "type": ["null", "int"] }, - { "name": "tinyint1", "type": ["null", "int"] }, - { "name": "smallint1", "type": ["null", "int"] }, - { "name": "bigint1", "type": ["null", "long"] }, - { "name": "boolean1", "type": ["null", "boolean"] }, - { "name": "float1", "type": ["null", "float"] }, - { "name": "double1", "type": ["null", "double"] }, - { "name": "list1", "type": ["null", {"type": "array", "items": "string"}] }, - { "name": "map1", "type": ["null", {"type": "map", "values": "int"}] }, - { "name": "struct1", "type": ["null", {"type": "record", "name": "struct1_name", "fields": [ - { "name": "sInt", "type": "int" }, - { "name": "sBoolean", "type": "boolean" }, - { "name": "sString", "type": "string" } - ]}] }, - { "name": "enum1", "type": ["null", {"type": "enum", "name": "enum1_values", "symbols": ["BLUE", "RED", "GREEN"]}] }, - { "name": "nullableint", "type": ["null", "int"] }, - { "name": "bytes1", "type": ["null", "bytes"] }, - { "name": "fixed1", "type": ["null", {"type": "fixed", "name": "threebytes", "size": 3}] } - ] - }' - ) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@as_avro -POSTHOOK: query: CREATE TABLE as_avro - ROW FORMAT - SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' - STORED AS - INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' - OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' - TBLPROPERTIES ( - 'avro.schema.literal'='{ - "namespace": "com.howdy", - "name": "some_schema", - "type": "record", - "fields": [ - { "name": "string1", "type": ["null", "string"] }, - { "name": "int1", "type": ["null", "int"] }, - { "name": "tinyint1", "type": ["null", "int"] }, - { "name": "smallint1", "type": ["null", "int"] }, - { "name": "bigint1", "type": ["null", "long"] }, - { "name": "boolean1", "type": ["null", "boolean"] }, - { "name": "float1", "type": ["null", "float"] }, - { "name": "double1", "type": ["null", "double"] }, - { "name": "list1", "type": ["null", {"type": "array", "items": "string"}] }, - { "name": "map1", "type": ["null", {"type": "map", "values": "int"}] }, - { "name": "struct1", "type": ["null", {"type": "record", "name": "struct1_name", "fields": [ - { "name": "sInt", "type": "int" }, - { "name": "sBoolean", "type": "boolean" }, - { "name": "sString", "type": "string" } - ]}] }, - { "name": "enum1", "type": ["null", {"type": "enum", "name": "enum1_values", "symbols": ["BLUE", "RED", "GREEN"]}] }, - { "name": "nullableint", "type": ["null", "int"] }, - { "name": "bytes1", "type": ["null", "bytes"] }, - { "name": "fixed1", "type": ["null", {"type": "fixed", "name": "threebytes", "size": 3}] } - ] - }' - ) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@as_avro -PREHOOK: query: INSERT OVERWRITE TABLE as_avro SELECT * FROM test_serializer -PREHOOK: type: QUERY -PREHOOK: Input: default@test_serializer -PREHOOK: Output: default@as_avro -POSTHOOK: query: INSERT OVERWRITE TABLE as_avro SELECT * FROM test_serializer -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_serializer -POSTHOOK: Output: default@as_avro -POSTHOOK: Lineage: as_avro.bigint1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bigint1, type:bigint, comment:null), ] -POSTHOOK: Lineage: as_avro.boolean1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:boolean1, type:boolean, comment:null), ] -POSTHOOK: Lineage: as_avro.bytes1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bytes1, type:binary, comment:null), ] -POSTHOOK: Lineage: as_avro.double1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:double1, type:double, comment:null), ] -POSTHOOK: Lineage: as_avro.enum1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:enum1, type:string, comment:null), ] -POSTHOOK: Lineage: as_avro.fixed1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:fixed1, type:binary, comment:null), ] -POSTHOOK: Lineage: as_avro.float1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:float1, type:float, comment:null), ] -POSTHOOK: Lineage: as_avro.int1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:int1, type:int, comment:null), ] -POSTHOOK: Lineage: as_avro.list1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:list1, type:array, comment:null), ] -POSTHOOK: Lineage: as_avro.map1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:map1, type:map, comment:null), ] -POSTHOOK: Lineage: as_avro.nullableint SIMPLE [(test_serializer)test_serializer.FieldSchema(name:nullableint, type:int, comment:null), ] -POSTHOOK: Lineage: as_avro.smallint1 EXPRESSION [(test_serializer)test_serializer.FieldSchema(name:smallint1, type:smallint, comment:null), ] -POSTHOOK: Lineage: as_avro.string1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:string1, type:string, comment:null), ] -POSTHOOK: Lineage: as_avro.struct1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:struct1, type:struct, comment:null), ] -POSTHOOK: Lineage: as_avro.tinyint1 EXPRESSION [(test_serializer)test_serializer.FieldSchema(name:tinyint1, type:tinyint, comment:null), ] -PREHOOK: query: SELECT * FROM as_avro -PREHOOK: type: QUERY -PREHOOK: Input: default@as_avro -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM as_avro -POSTHOOK: type: QUERY -POSTHOOK: Input: default@as_avro -#### A masked pattern was here #### -why hello there 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -another record 98 4 101 9999999 false 99.89 9.0E-8 ["beta"] {"Earth":101} {"sint":1134,"sboolean":false,"sstring":"wazzup"} RED NULL  ef -third record 45 5 102 999999999 true 89.99 9.0E-14 ["alpha","gamma"] {"Earth":237,"Bob":723} {"sint":102,"sboolean":false,"sstring":"BNL"} GREEN NULL  hi -NULL 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string NULL 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string 42 NULL 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string 42 3 NULL 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string 42 3 100 NULL true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string 42 3 100 1412341 NULL 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string 42 3 100 1412341 true NULL 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string 42 3 100 1412341 true 42.43 NULL ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string 42 3 100 1412341 true 42.43 85.23423424 NULL {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] NULL {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc -string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} NULL BLUE 72  bc -string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} NULL 72  bc -string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE NULL  bc -string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 NULL bc -string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  NULL diff --git ql/src/test/results/clientpositive/avro_nullable_fields.q.out ql/src/test/results/clientpositive/avro_nullable_fields.q.out new file mode 100644 index 0000000000000000000000000000000000000000..2272b34b75fd5ea91cb1a5edf6b6227e45825818 --- /dev/null +++ ql/src/test/results/clientpositive/avro_nullable_fields.q.out @@ -0,0 +1,177 @@ +PREHOOK: query: -- Verify that nullable fields properly work + + +CREATE TABLE test_serializer(string1 STRING, + int1 INT, + tinyint1 TINYINT, + smallint1 SMALLINT, + bigint1 BIGINT, + boolean1 BOOLEAN, + float1 FLOAT, + double1 DOUBLE, + list1 ARRAY, + map1 MAP, + struct1 STRUCT, + enum1 STRING, + nullableint INT, + bytes1 BINARY, + fixed1 BINARY) + ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY ':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n' + STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_serializer +POSTHOOK: query: -- Verify that nullable fields properly work + + +CREATE TABLE test_serializer(string1 STRING, + int1 INT, + tinyint1 TINYINT, + smallint1 SMALLINT, + bigint1 BIGINT, + boolean1 BOOLEAN, + float1 FLOAT, + double1 DOUBLE, + list1 ARRAY, + map1 MAP, + struct1 STRUCT, + enum1 STRING, + nullableint INT, + bytes1 BINARY, + fixed1 BINARY) + ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY ':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n' + STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_serializer +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/csv.txt' INTO TABLE test_serializer +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@test_serializer +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/csv.txt' INTO TABLE test_serializer +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@test_serializer +PREHOOK: query: CREATE TABLE as_avro + ROW FORMAT + SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + STORED AS + INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' + TBLPROPERTIES ( + 'avro.schema.literal'='{ + "namespace": "com.howdy", + "name": "some_schema", + "type": "record", + "fields": [ + { "name": "string1", "type": ["null", "string"] }, + { "name": "int1", "type": ["null", "int"] }, + { "name": "tinyint1", "type": ["null", "int"] }, + { "name": "smallint1", "type": ["null", "int"] }, + { "name": "bigint1", "type": ["null", "long"] }, + { "name": "boolean1", "type": ["null", "boolean"] }, + { "name": "float1", "type": ["null", "float"] }, + { "name": "double1", "type": ["null", "double"] }, + { "name": "list1", "type": ["null", {"type": "array", "items": "string"}] }, + { "name": "map1", "type": ["null", {"type": "map", "values": "int"}] }, + { "name": "struct1", "type": ["null", {"type": "record", "name": "struct1_name", "fields": [ + { "name": "sInt", "type": "int" }, + { "name": "sBoolean", "type": "boolean" }, + { "name": "sString", "type": "string" } + ]}] }, + { "name": "enum1", "type": ["null", {"type": "enum", "name": "enum1_values", "symbols": ["BLUE", "RED", "GREEN"]}] }, + { "name": "nullableint", "type": ["null", "int"] }, + { "name": "bytes1", "type": ["null", "bytes"] }, + { "name": "fixed1", "type": ["null", {"type": "fixed", "name": "threebytes", "size": 3}] } + ] + }' + ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@as_avro +POSTHOOK: query: CREATE TABLE as_avro + ROW FORMAT + SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + STORED AS + INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' + TBLPROPERTIES ( + 'avro.schema.literal'='{ + "namespace": "com.howdy", + "name": "some_schema", + "type": "record", + "fields": [ + { "name": "string1", "type": ["null", "string"] }, + { "name": "int1", "type": ["null", "int"] }, + { "name": "tinyint1", "type": ["null", "int"] }, + { "name": "smallint1", "type": ["null", "int"] }, + { "name": "bigint1", "type": ["null", "long"] }, + { "name": "boolean1", "type": ["null", "boolean"] }, + { "name": "float1", "type": ["null", "float"] }, + { "name": "double1", "type": ["null", "double"] }, + { "name": "list1", "type": ["null", {"type": "array", "items": "string"}] }, + { "name": "map1", "type": ["null", {"type": "map", "values": "int"}] }, + { "name": "struct1", "type": ["null", {"type": "record", "name": "struct1_name", "fields": [ + { "name": "sInt", "type": "int" }, + { "name": "sBoolean", "type": "boolean" }, + { "name": "sString", "type": "string" } + ]}] }, + { "name": "enum1", "type": ["null", {"type": "enum", "name": "enum1_values", "symbols": ["BLUE", "RED", "GREEN"]}] }, + { "name": "nullableint", "type": ["null", "int"] }, + { "name": "bytes1", "type": ["null", "bytes"] }, + { "name": "fixed1", "type": ["null", {"type": "fixed", "name": "threebytes", "size": 3}] } + ] + }' + ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@as_avro +PREHOOK: query: INSERT OVERWRITE TABLE as_avro SELECT * FROM test_serializer +PREHOOK: type: QUERY +PREHOOK: Input: default@test_serializer +PREHOOK: Output: default@as_avro +POSTHOOK: query: INSERT OVERWRITE TABLE as_avro SELECT * FROM test_serializer +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_serializer +POSTHOOK: Output: default@as_avro +POSTHOOK: Lineage: as_avro.bigint1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bigint1, type:bigint, comment:null), ] +POSTHOOK: Lineage: as_avro.boolean1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:boolean1, type:boolean, comment:null), ] +POSTHOOK: Lineage: as_avro.bytes1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bytes1, type:binary, comment:null), ] +POSTHOOK: Lineage: as_avro.double1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:double1, type:double, comment:null), ] +POSTHOOK: Lineage: as_avro.enum1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:enum1, type:string, comment:null), ] +POSTHOOK: Lineage: as_avro.fixed1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:fixed1, type:binary, comment:null), ] +POSTHOOK: Lineage: as_avro.float1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:float1, type:float, comment:null), ] +POSTHOOK: Lineage: as_avro.int1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:int1, type:int, comment:null), ] +POSTHOOK: Lineage: as_avro.list1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:list1, type:array, comment:null), ] +POSTHOOK: Lineage: as_avro.map1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:map1, type:map, comment:null), ] +POSTHOOK: Lineage: as_avro.nullableint SIMPLE [(test_serializer)test_serializer.FieldSchema(name:nullableint, type:int, comment:null), ] +POSTHOOK: Lineage: as_avro.smallint1 EXPRESSION [(test_serializer)test_serializer.FieldSchema(name:smallint1, type:smallint, comment:null), ] +POSTHOOK: Lineage: as_avro.string1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: as_avro.struct1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:struct1, type:struct, comment:null), ] +POSTHOOK: Lineage: as_avro.tinyint1 EXPRESSION [(test_serializer)test_serializer.FieldSchema(name:tinyint1, type:tinyint, comment:null), ] +PREHOOK: query: SELECT * FROM as_avro +PREHOOK: type: QUERY +PREHOOK: Input: default@as_avro +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM as_avro +POSTHOOK: type: QUERY +POSTHOOK: Input: default@as_avro +#### A masked pattern was here #### +why hello there 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +another record 98 4 101 9999999 false 99.89 9.0E-8 ["beta"] {"Earth":101} {"sint":1134,"sboolean":false,"sstring":"wazzup"} RED NULL  ef +third record 45 5 102 999999999 true 89.99 9.0E-14 ["alpha","gamma"] {"Earth":237,"Bob":723} {"sint":102,"sboolean":false,"sstring":"BNL"} GREEN NULL  hi +NULL 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string NULL 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string 42 NULL 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string 42 3 NULL 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string 42 3 100 NULL true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string 42 3 100 1412341 NULL 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string 42 3 100 1412341 true NULL 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string 42 3 100 1412341 true 42.43 NULL ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string 42 3 100 1412341 true 42.43 85.23423424 NULL {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] NULL {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} NULL BLUE 72  bc +string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} NULL 72  bc +string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE NULL  bc +string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 NULL bc +string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Control":86,"Bob":31} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  NULL diff --git ql/src/test/results/clientpositive/avro_timestamp.q.java1.7.out ql/src/test/results/clientpositive/avro_timestamp.q.java1.7.out deleted file mode 100644 index d2d3b7c6015cbd1eccbdddc164629750b6a9867e..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/avro_timestamp.q.java1.7.out +++ /dev/null @@ -1,134 +0,0 @@ -PREHOOK: query: -- Exclude test on Windows due to space character being escaped in Hive paths on Windows. --- EXCLUDE_OS_WINDOWS --- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE avro_timestamp_staging -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- Exclude test on Windows due to space character being escaped in Hive paths on Windows. --- EXCLUDE_OS_WINDOWS --- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE avro_timestamp_staging -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE avro_timestamp -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE avro_timestamp -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE avro_timestamp_casts -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE avro_timestamp_casts -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE avro_timestamp_staging (d timestamp, m1 map, l1 array) - ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' - COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' - STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@avro_timestamp_staging -POSTHOOK: query: CREATE TABLE avro_timestamp_staging (d timestamp, m1 map, l1 array) - ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' - COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' - STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@avro_timestamp_staging -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/avro_timestamp.txt' OVERWRITE INTO TABLE avro_timestamp_staging -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@avro_timestamp_staging -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/avro_timestamp.txt' OVERWRITE INTO TABLE avro_timestamp_staging -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@avro_timestamp_staging -PREHOOK: query: CREATE TABLE avro_timestamp (d timestamp, m1 map, l1 array) - PARTITIONED BY (p1 int, p2 timestamp) - ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' - COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' - STORED AS AVRO -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@avro_timestamp -POSTHOOK: query: CREATE TABLE avro_timestamp (d timestamp, m1 map, l1 array) - PARTITIONED BY (p1 int, p2 timestamp) - ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' - COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' - STORED AS AVRO -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@avro_timestamp -PREHOOK: query: INSERT OVERWRITE TABLE avro_timestamp PARTITION(p1=2, p2='2014-09-26 07:08:09.123') SELECT * FROM avro_timestamp_staging -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_timestamp_staging -PREHOOK: Output: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -POSTHOOK: query: INSERT OVERWRITE TABLE avro_timestamp PARTITION(p1=2, p2='2014-09-26 07:08:09.123') SELECT * FROM avro_timestamp_staging -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_timestamp_staging -POSTHOOK: Output: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -POSTHOOK: Lineage: avro_timestamp PARTITION(p1=2,p2=2014-09-26 07:08:09.123).d SIMPLE [(avro_timestamp_staging)avro_timestamp_staging.FieldSchema(name:d, type:timestamp, comment:null), ] -POSTHOOK: Lineage: avro_timestamp PARTITION(p1=2,p2=2014-09-26 07:08:09.123).l1 SIMPLE [(avro_timestamp_staging)avro_timestamp_staging.FieldSchema(name:l1, type:array, comment:null), ] -POSTHOOK: Lineage: avro_timestamp PARTITION(p1=2,p2=2014-09-26 07:08:09.123).m1 SIMPLE [(avro_timestamp_staging)avro_timestamp_staging.FieldSchema(name:m1, type:map, comment:null), ] -PREHOOK: query: SELECT * FROM avro_timestamp -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_timestamp -PREHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_timestamp -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_timestamp -POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -2012-02-21 07:08:09.123 {"foo":"1980-12-16 07:08:09.123","bar":"1998-05-07 07:08:09.123"} ["2011-09-04 07:08:09.123","2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -2014-02-11 07:08:09.123 {"baz":"1981-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -1947-02-11 07:08:09.123 {"baz":"1921-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -8200-02-11 07:08:09.123 {"baz":"6981-12-16 07:08:09.123"} ["1039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -PREHOOK: query: SELECT d, COUNT(d) FROM avro_timestamp GROUP BY d -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_timestamp -PREHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -POSTHOOK: query: SELECT d, COUNT(d) FROM avro_timestamp GROUP BY d -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_timestamp -POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -1947-02-11 07:08:09.123 1 -2012-02-21 07:08:09.123 1 -2014-02-11 07:08:09.123 1 -8200-02-11 07:08:09.123 1 -PREHOOK: query: SELECT * FROM avro_timestamp WHERE d!='1947-02-11 07:08:09.123' -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_timestamp -PREHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_timestamp WHERE d!='1947-02-11 07:08:09.123' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_timestamp -POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -2012-02-21 07:08:09.123 {"foo":"1980-12-16 07:08:09.123","bar":"1998-05-07 07:08:09.123"} ["2011-09-04 07:08:09.123","2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -2014-02-11 07:08:09.123 {"baz":"1981-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -8200-02-11 07:08:09.123 {"baz":"6981-12-16 07:08:09.123"} ["1039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -PREHOOK: query: SELECT * FROM avro_timestamp WHERE d<'2014-12-21 07:08:09.123' -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_timestamp -PREHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_timestamp WHERE d<'2014-12-21 07:08:09.123' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_timestamp -POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -2012-02-21 07:08:09.123 {"foo":"1980-12-16 07:08:09.123","bar":"1998-05-07 07:08:09.123"} ["2011-09-04 07:08:09.123","2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -2014-02-11 07:08:09.123 {"baz":"1981-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -1947-02-11 07:08:09.123 {"baz":"1921-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -PREHOOK: query: SELECT * FROM avro_timestamp WHERE d>'8000-12-01 07:08:09.123' -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_timestamp -PREHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_timestamp WHERE d>'8000-12-01 07:08:09.123' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_timestamp -POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -8200-02-11 07:08:09.123 {"baz":"6981-12-16 07:08:09.123"} ["1039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 diff --git ql/src/test/results/clientpositive/avro_timestamp.q.java1.8.out ql/src/test/results/clientpositive/avro_timestamp.q.java1.8.out deleted file mode 100644 index 4c3834702bf3fb70422b3beefbf4e8053d47b983..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/avro_timestamp.q.java1.8.out +++ /dev/null @@ -1,134 +0,0 @@ -PREHOOK: query: -- Exclude test on Windows due to space character being escaped in Hive paths on Windows. --- EXCLUDE_OS_WINDOWS --- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE avro_timestamp_staging -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- Exclude test on Windows due to space character being escaped in Hive paths on Windows. --- EXCLUDE_OS_WINDOWS --- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE avro_timestamp_staging -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE avro_timestamp -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE avro_timestamp -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE avro_timestamp_casts -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE avro_timestamp_casts -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE avro_timestamp_staging (d timestamp, m1 map, l1 array) - ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' - COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' - STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@avro_timestamp_staging -POSTHOOK: query: CREATE TABLE avro_timestamp_staging (d timestamp, m1 map, l1 array) - ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' - COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' - STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@avro_timestamp_staging -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/avro_timestamp.txt' OVERWRITE INTO TABLE avro_timestamp_staging -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@avro_timestamp_staging -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/avro_timestamp.txt' OVERWRITE INTO TABLE avro_timestamp_staging -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@avro_timestamp_staging -PREHOOK: query: CREATE TABLE avro_timestamp (d timestamp, m1 map, l1 array) - PARTITIONED BY (p1 int, p2 timestamp) - ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' - COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' - STORED AS AVRO -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@avro_timestamp -POSTHOOK: query: CREATE TABLE avro_timestamp (d timestamp, m1 map, l1 array) - PARTITIONED BY (p1 int, p2 timestamp) - ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' - COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' - STORED AS AVRO -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@avro_timestamp -PREHOOK: query: INSERT OVERWRITE TABLE avro_timestamp PARTITION(p1=2, p2='2014-09-26 07:08:09.123') SELECT * FROM avro_timestamp_staging -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_timestamp_staging -PREHOOK: Output: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -POSTHOOK: query: INSERT OVERWRITE TABLE avro_timestamp PARTITION(p1=2, p2='2014-09-26 07:08:09.123') SELECT * FROM avro_timestamp_staging -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_timestamp_staging -POSTHOOK: Output: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -POSTHOOK: Lineage: avro_timestamp PARTITION(p1=2,p2=2014-09-26 07:08:09.123).d SIMPLE [(avro_timestamp_staging)avro_timestamp_staging.FieldSchema(name:d, type:timestamp, comment:null), ] -POSTHOOK: Lineage: avro_timestamp PARTITION(p1=2,p2=2014-09-26 07:08:09.123).l1 SIMPLE [(avro_timestamp_staging)avro_timestamp_staging.FieldSchema(name:l1, type:array, comment:null), ] -POSTHOOK: Lineage: avro_timestamp PARTITION(p1=2,p2=2014-09-26 07:08:09.123).m1 SIMPLE [(avro_timestamp_staging)avro_timestamp_staging.FieldSchema(name:m1, type:map, comment:null), ] -PREHOOK: query: SELECT * FROM avro_timestamp -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_timestamp -PREHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_timestamp -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_timestamp -POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -2012-02-21 07:08:09.123 {"bar":"1998-05-07 07:08:09.123","foo":"1980-12-16 07:08:09.123"} ["2011-09-04 07:08:09.123","2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -2014-02-11 07:08:09.123 {"baz":"1981-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -1947-02-11 07:08:09.123 {"baz":"1921-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -8200-02-11 07:08:09.123 {"baz":"6981-12-16 07:08:09.123"} ["1039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -PREHOOK: query: SELECT d, COUNT(d) FROM avro_timestamp GROUP BY d -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_timestamp -PREHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -POSTHOOK: query: SELECT d, COUNT(d) FROM avro_timestamp GROUP BY d -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_timestamp -POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -1947-02-11 07:08:09.123 1 -2012-02-21 07:08:09.123 1 -2014-02-11 07:08:09.123 1 -8200-02-11 07:08:09.123 1 -PREHOOK: query: SELECT * FROM avro_timestamp WHERE d!='1947-02-11 07:08:09.123' -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_timestamp -PREHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_timestamp WHERE d!='1947-02-11 07:08:09.123' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_timestamp -POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -2012-02-21 07:08:09.123 {"bar":"1998-05-07 07:08:09.123","foo":"1980-12-16 07:08:09.123"} ["2011-09-04 07:08:09.123","2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -2014-02-11 07:08:09.123 {"baz":"1981-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -8200-02-11 07:08:09.123 {"baz":"6981-12-16 07:08:09.123"} ["1039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -PREHOOK: query: SELECT * FROM avro_timestamp WHERE d<'2014-12-21 07:08:09.123' -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_timestamp -PREHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_timestamp WHERE d<'2014-12-21 07:08:09.123' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_timestamp -POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -2012-02-21 07:08:09.123 {"bar":"1998-05-07 07:08:09.123","foo":"1980-12-16 07:08:09.123"} ["2011-09-04 07:08:09.123","2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -2014-02-11 07:08:09.123 {"baz":"1981-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -1947-02-11 07:08:09.123 {"baz":"1921-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 -PREHOOK: query: SELECT * FROM avro_timestamp WHERE d>'8000-12-01 07:08:09.123' -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_timestamp -PREHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_timestamp WHERE d>'8000-12-01 07:08:09.123' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_timestamp -POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 -#### A masked pattern was here #### -8200-02-11 07:08:09.123 {"baz":"6981-12-16 07:08:09.123"} ["1039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 diff --git ql/src/test/results/clientpositive/avro_timestamp.q.out ql/src/test/results/clientpositive/avro_timestamp.q.out new file mode 100644 index 0000000000000000000000000000000000000000..868807aef264941575081af6b3dedbd82d83a206 --- /dev/null +++ ql/src/test/results/clientpositive/avro_timestamp.q.out @@ -0,0 +1,132 @@ +PREHOOK: query: -- Exclude test on Windows due to space character being escaped in Hive paths on Windows. +-- EXCLUDE_OS_WINDOWS + +DROP TABLE avro_timestamp_staging +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- Exclude test on Windows due to space character being escaped in Hive paths on Windows. +-- EXCLUDE_OS_WINDOWS + +DROP TABLE avro_timestamp_staging +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE avro_timestamp +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE avro_timestamp +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE avro_timestamp_casts +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE avro_timestamp_casts +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE avro_timestamp_staging (d timestamp, m1 map, l1 array) + ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' + STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_timestamp_staging +POSTHOOK: query: CREATE TABLE avro_timestamp_staging (d timestamp, m1 map, l1 array) + ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' + STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_timestamp_staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/avro_timestamp.txt' OVERWRITE INTO TABLE avro_timestamp_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@avro_timestamp_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/avro_timestamp.txt' OVERWRITE INTO TABLE avro_timestamp_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@avro_timestamp_staging +PREHOOK: query: CREATE TABLE avro_timestamp (d timestamp, m1 map, l1 array) + PARTITIONED BY (p1 int, p2 timestamp) + ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' + STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_timestamp +POSTHOOK: query: CREATE TABLE avro_timestamp (d timestamp, m1 map, l1 array) + PARTITIONED BY (p1 int, p2 timestamp) + ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' + STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_timestamp +PREHOOK: query: INSERT OVERWRITE TABLE avro_timestamp PARTITION(p1=2, p2='2014-09-26 07:08:09.123') SELECT * FROM avro_timestamp_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_timestamp_staging +PREHOOK: Output: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 +POSTHOOK: query: INSERT OVERWRITE TABLE avro_timestamp PARTITION(p1=2, p2='2014-09-26 07:08:09.123') SELECT * FROM avro_timestamp_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_timestamp_staging +POSTHOOK: Output: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 +POSTHOOK: Lineage: avro_timestamp PARTITION(p1=2,p2=2014-09-26 07:08:09.123).d SIMPLE [(avro_timestamp_staging)avro_timestamp_staging.FieldSchema(name:d, type:timestamp, comment:null), ] +POSTHOOK: Lineage: avro_timestamp PARTITION(p1=2,p2=2014-09-26 07:08:09.123).l1 SIMPLE [(avro_timestamp_staging)avro_timestamp_staging.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: avro_timestamp PARTITION(p1=2,p2=2014-09-26 07:08:09.123).m1 SIMPLE [(avro_timestamp_staging)avro_timestamp_staging.FieldSchema(name:m1, type:map, comment:null), ] +PREHOOK: query: SELECT * FROM avro_timestamp +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_timestamp +PREHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM avro_timestamp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_timestamp +POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 +#### A masked pattern was here #### +2012-02-21 07:08:09.123 {"bar":"1998-05-07 07:08:09.123","foo":"1980-12-16 07:08:09.123"} ["2011-09-04 07:08:09.123","2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +2014-02-11 07:08:09.123 {"baz":"1981-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +1947-02-11 07:08:09.123 {"baz":"1921-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +8200-02-11 07:08:09.123 {"baz":"6981-12-16 07:08:09.123"} ["1039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +PREHOOK: query: SELECT d, COUNT(d) FROM avro_timestamp GROUP BY d +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_timestamp +PREHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 +#### A masked pattern was here #### +POSTHOOK: query: SELECT d, COUNT(d) FROM avro_timestamp GROUP BY d +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_timestamp +POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 +#### A masked pattern was here #### +1947-02-11 07:08:09.123 1 +2012-02-21 07:08:09.123 1 +2014-02-11 07:08:09.123 1 +8200-02-11 07:08:09.123 1 +PREHOOK: query: SELECT * FROM avro_timestamp WHERE d!='1947-02-11 07:08:09.123' +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_timestamp +PREHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM avro_timestamp WHERE d!='1947-02-11 07:08:09.123' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_timestamp +POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 +#### A masked pattern was here #### +2012-02-21 07:08:09.123 {"bar":"1998-05-07 07:08:09.123","foo":"1980-12-16 07:08:09.123"} ["2011-09-04 07:08:09.123","2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +2014-02-11 07:08:09.123 {"baz":"1981-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +8200-02-11 07:08:09.123 {"baz":"6981-12-16 07:08:09.123"} ["1039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +PREHOOK: query: SELECT * FROM avro_timestamp WHERE d<'2014-12-21 07:08:09.123' +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_timestamp +PREHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM avro_timestamp WHERE d<'2014-12-21 07:08:09.123' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_timestamp +POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 +#### A masked pattern was here #### +2012-02-21 07:08:09.123 {"bar":"1998-05-07 07:08:09.123","foo":"1980-12-16 07:08:09.123"} ["2011-09-04 07:08:09.123","2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +2014-02-11 07:08:09.123 {"baz":"1981-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +1947-02-11 07:08:09.123 {"baz":"1921-12-16 07:08:09.123"} ["2011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 +PREHOOK: query: SELECT * FROM avro_timestamp WHERE d>'8000-12-01 07:08:09.123' +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_timestamp +PREHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM avro_timestamp WHERE d>'8000-12-01 07:08:09.123' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_timestamp +POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123 +#### A masked pattern was here #### +8200-02-11 07:08:09.123 {"baz":"6981-12-16 07:08:09.123"} ["1039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123 diff --git ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.java1.7.out ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.java1.7.out deleted file mode 100644 index 5c40dc47bc537893d38004b4df0ddfa2f3b678ed..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.java1.7.out +++ /dev/null @@ -1,693 +0,0 @@ -PREHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -PREHOOK: type: QUERY -POSTHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false - TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: key, value, ds - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string), ds (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /src [a] - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - /srcpart/ds=2008-04-09/hr=11 [b] - /srcpart/ds=2008-04-09/hr=12 [b] - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - filter mappings: - 1 [0, 1] - filter predicates: - 0 - 1 {(VALUE.ds = '2008-04-08')} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: key, value, key0, value0 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), key0 (type: string), value0 (type: string) - outputColumnNames: key, value, key0, value0 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key0) > 15.0) and (UDFToDouble(key0) < 25.0)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns key,value,key0,value0 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -#### A masked pattern was here #### -17 val_17 17 val_17 -17 val_17 17 val_17 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -19 val_19 19 val_19 -19 val_19 19 val_19 -PREHOOK: query: EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false - TableScan - alias: b - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /src [a] - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: key, value, key0, value0 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), key0 (type: string), value0 (type: string) - outputColumnNames: key, value, key0, value0 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns key,value,key0,value0 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -17 val_17 17 val_17 -17 val_17 17 val_17 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -19 val_19 19 val_19 -19 val_19 19 val_19 diff --git ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.out ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.out new file mode 100644 index 0000000000000000000000000000000000000000..200b8eec4fe10d7f4148c1c2836908db2f826d16 --- /dev/null +++ ql/src/test/results/clientpositive/cbo_rp_outer_join_ppr.q.out @@ -0,0 +1,691 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +POSTHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false + TableScan + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string), ds (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string), ds (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /src [a] + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + /srcpart/ds=2008-04-09/hr=11 [b] + /srcpart/ds=2008-04-09/hr=12 [b] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + filter mappings: + 1 [0, 1] + filter predicates: + 0 + 1 {(VALUE.ds = '2008-04-08')} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: key, value, key0, value0 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), key0 (type: string), value0 (type: string) + outputColumnNames: key, value, key0, value0 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0) and (UDFToDouble(key0) > 15.0) and (UDFToDouble(key0) < 25.0)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns key,value,key0,value0 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 +PREHOOK: query: EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false + TableScan + alias: b + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /src [a] + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: key, value, key0, value0 + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), key0 (type: string), value0 (type: string) + outputColumnNames: key, value, key0, value0 + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 10.0) and (UDFToDouble(key) < 20.0)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns key,value,key0,value0 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 diff --git ql/src/test/results/clientpositive/char_udf1.q.java1.7.out ql/src/test/results/clientpositive/char_udf1.q.java1.7.out deleted file mode 100644 index ee1c2aeb8cf8546b239ad4ed60f21853c21b5278..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/char_udf1.q.java1.7.out +++ /dev/null @@ -1,463 +0,0 @@ -PREHOOK: query: drop table char_udf_1 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table char_udf_1 -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table char_udf_1 (c1 string, c2 string, c3 char(10), c4 char(20)) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@char_udf_1 -POSTHOOK: query: create table char_udf_1 (c1 string, c2 string, c3 char(10), c4 char(20)) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@char_udf_1 -PREHOOK: query: insert overwrite table char_udf_1 - select key, value, key, value from src where key = '238' limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@char_udf_1 -POSTHOOK: query: insert overwrite table char_udf_1 - select key, value, key, value from src where key = '238' limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@char_udf_1 -POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - --- UDFs with char support -select - concat(c1, c2), - concat(c3, c4), - concat(c1, c2) = concat(c3, c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - --- UDFs with char support -select - concat(c1, c2), - concat(c3, c4), - concat(c1, c2) = concat(c3, c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -238val_238 238val_238 true -PREHOOK: query: select - upper(c2), - upper(c4), - upper(c2) = upper(c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - upper(c2), - upper(c4), - upper(c2) = upper(c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -VAL_238 VAL_238 true -PREHOOK: query: select - lower(c2), - lower(c4), - lower(c2) = lower(c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - lower(c2), - lower(c4), - lower(c2) = lower(c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: -- Scalar UDFs -select - ascii(c2), - ascii(c4), - ascii(c2) = ascii(c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: -- Scalar UDFs -select - ascii(c2), - ascii(c4), - ascii(c2) = ascii(c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -118 118 true -PREHOOK: query: select - concat_ws('|', c1, c2), - concat_ws('|', c3, c4), - concat_ws('|', c1, c2) = concat_ws('|', c3, c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - concat_ws('|', c1, c2), - concat_ws('|', c3, c4), - concat_ws('|', c1, c2) = concat_ws('|', c3, c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -238|val_238 238|val_238 true -PREHOOK: query: select - decode(encode(c2, 'US-ASCII'), 'US-ASCII'), - decode(encode(c4, 'US-ASCII'), 'US-ASCII'), - decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - decode(encode(c2, 'US-ASCII'), 'US-ASCII'), - decode(encode(c4, 'US-ASCII'), 'US-ASCII'), - decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: select - instr(c2, '_'), - instr(c4, '_'), - instr(c2, '_') = instr(c4, '_') -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - instr(c2, '_'), - instr(c4, '_'), - instr(c2, '_') = instr(c4, '_') -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -4 4 true -PREHOOK: query: select - length(c2), - length(c4), - length(c2) = length(c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - length(c2), - length(c4), - length(c2) = length(c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -7 7 true -PREHOOK: query: select - locate('a', 'abcdabcd', 3), - locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3), - locate('a', 'abcdabcd', 3) = locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - locate('a', 'abcdabcd', 3), - locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3), - locate('a', 'abcdabcd', 3) = locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -5 5 true -PREHOOK: query: select - lpad(c2, 15, ' '), - lpad(c4, 15, ' '), - lpad(c2, 15, ' ') = lpad(c4, 15, ' ') -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - lpad(c2, 15, ' '), - lpad(c4, 15, ' '), - lpad(c2, 15, ' ') = lpad(c4, 15, ' ') -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### - val_238 val_238 true -PREHOOK: query: select - ltrim(c2), - ltrim(c4), - ltrim(c2) = ltrim(c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - ltrim(c2), - ltrim(c4), - ltrim(c2) = ltrim(c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: -- In hive wiki page https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF --- we only allow A regexp B, not regexp (A,B). - -select - c2 regexp 'val', - c4 regexp 'val', - (c2 regexp 'val') = (c4 regexp 'val') -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: -- In hive wiki page https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF --- we only allow A regexp B, not regexp (A,B). - -select - c2 regexp 'val', - c4 regexp 'val', - (c2 regexp 'val') = (c4 regexp 'val') -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -true true true -PREHOOK: query: select - regexp_extract(c2, 'val_([0-9]+)', 1), - regexp_extract(c4, 'val_([0-9]+)', 1), - regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - regexp_extract(c2, 'val_([0-9]+)', 1), - regexp_extract(c4, 'val_([0-9]+)', 1), - regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -238 238 true -PREHOOK: query: select - regexp_replace(c2, 'val', 'replaced'), - regexp_replace(c4, 'val', 'replaced'), - regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - regexp_replace(c2, 'val', 'replaced'), - regexp_replace(c4, 'val', 'replaced'), - regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -replaced_238 replaced_238 true -PREHOOK: query: select - reverse(c2), - reverse(c4), - reverse(c2) = reverse(c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - reverse(c2), - reverse(c4), - reverse(c2) = reverse(c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -832_lav 832_lav true -PREHOOK: query: select - rpad(c2, 15, ' '), - rpad(c4, 15, ' '), - rpad(c2, 15, ' ') = rpad(c4, 15, ' ') -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - rpad(c2, 15, ' '), - rpad(c4, 15, ' '), - rpad(c2, 15, ' ') = rpad(c4, 15, ' ') -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: select - rtrim(c2), - rtrim(c4), - rtrim(c2) = rtrim(c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - rtrim(c2), - rtrim(c4), - rtrim(c2) = rtrim(c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: select - sentences('See spot run. See jane run.'), - sentences(cast('See spot run. See jane run.' as char(50))) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - sentences('See spot run. See jane run.'), - sentences(cast('See spot run. See jane run.' as char(50))) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -[["See","spot","run"],["See","jane","run"]] [["See","spot","run"],["See","jane","run"]] -PREHOOK: query: select - split(c2, '_'), - split(c4, '_') -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - split(c2, '_'), - split(c4, '_') -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -["val","238"] ["val","238"] -PREHOOK: query: select - str_to_map('a:1,b:2,c:3',',',':'), - str_to_map(cast('a:1,b:2,c:3' as char(20)),',',':') -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - str_to_map('a:1,b:2,c:3',',',':'), - str_to_map(cast('a:1,b:2,c:3' as char(20)),',',':') -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -{"b":"2","a":"1","c":"3"} {"b":"2","a":"1","c":"3"} -PREHOOK: query: select - substr(c2, 1, 3), - substr(c4, 1, 3), - substr(c2, 1, 3) = substr(c4, 1, 3) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - substr(c2, 1, 3), - substr(c4, 1, 3), - substr(c2, 1, 3) = substr(c4, 1, 3) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -val val true -PREHOOK: query: select - trim(c2), - trim(c4), - trim(c2) = trim(c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - trim(c2), - trim(c4), - trim(c2) = trim(c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: -- Aggregate Functions -select - compute_stats(c2, 16), - compute_stats(c4, 16) -from char_udf_1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: -- Aggregate Functions -select - compute_stats(c2, 16), - compute_stats(c4, 16) -from char_udf_1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} -PREHOOK: query: select - min(c2), - min(c4) -from char_udf_1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - min(c2), - min(c4) -from char_udf_1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -val_238 val_238 -PREHOOK: query: select - max(c2), - max(c4) -from char_udf_1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - max(c2), - max(c4) -from char_udf_1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -val_238 val_238 -PREHOOK: query: drop table char_udf_1 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@char_udf_1 -PREHOOK: Output: default@char_udf_1 -POSTHOOK: query: drop table char_udf_1 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@char_udf_1 -POSTHOOK: Output: default@char_udf_1 diff --git ql/src/test/results/clientpositive/char_udf1.q.java1.8.out ql/src/test/results/clientpositive/char_udf1.q.java1.8.out deleted file mode 100644 index 5691a0636a2337556a6d691d60aa35ee2adb4d34..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/char_udf1.q.java1.8.out +++ /dev/null @@ -1,457 +0,0 @@ -PREHOOK: query: drop table char_udf_1 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table char_udf_1 -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table char_udf_1 (c1 string, c2 string, c3 char(10), c4 char(20)) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@char_udf_1 -POSTHOOK: query: create table char_udf_1 (c1 string, c2 string, c3 char(10), c4 char(20)) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@char_udf_1 -PREHOOK: query: insert overwrite table char_udf_1 - select key, value, key, value from src where key = '238' limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@char_udf_1 -POSTHOOK: query: insert overwrite table char_udf_1 - select key, value, key, value from src where key = '238' limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@char_udf_1 -POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - --- UDFs with char support -select - concat(c1, c2), - concat(c3, c4), - concat(c1, c2) = concat(c3, c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - --- UDFs with char support -select - concat(c1, c2), - concat(c3, c4), - concat(c1, c2) = concat(c3, c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -238val_238 238val_238 true -PREHOOK: query: select - upper(c2), - upper(c4), - upper(c2) = upper(c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - upper(c2), - upper(c4), - upper(c2) = upper(c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -VAL_238 VAL_238 true -PREHOOK: query: select - lower(c2), - lower(c4), - lower(c2) = lower(c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - lower(c2), - lower(c4), - lower(c2) = lower(c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: -- Scalar UDFs -select - ascii(c2), - ascii(c4), - ascii(c2) = ascii(c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: -- Scalar UDFs -select - ascii(c2), - ascii(c4), - ascii(c2) = ascii(c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -118 118 true -PREHOOK: query: select - concat_ws('|', c1, c2), - concat_ws('|', c3, c4), - concat_ws('|', c1, c2) = concat_ws('|', c3, c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - concat_ws('|', c1, c2), - concat_ws('|', c3, c4), - concat_ws('|', c1, c2) = concat_ws('|', c3, c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -238|val_238 238|val_238 true -PREHOOK: query: select - decode(encode(c2, 'US-ASCII'), 'US-ASCII'), - decode(encode(c4, 'US-ASCII'), 'US-ASCII'), - decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - decode(encode(c2, 'US-ASCII'), 'US-ASCII'), - decode(encode(c4, 'US-ASCII'), 'US-ASCII'), - decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: select - instr(c2, '_'), - instr(c4, '_'), - instr(c2, '_') = instr(c4, '_') -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - instr(c2, '_'), - instr(c4, '_'), - instr(c2, '_') = instr(c4, '_') -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -4 4 true -PREHOOK: query: select - length(c2), - length(c4), - length(c2) = length(c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - length(c2), - length(c4), - length(c2) = length(c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -7 7 true -PREHOOK: query: select - locate('a', 'abcdabcd', 3), - locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3), - locate('a', 'abcdabcd', 3) = locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - locate('a', 'abcdabcd', 3), - locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3), - locate('a', 'abcdabcd', 3) = locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -5 5 true -PREHOOK: query: select - lpad(c2, 15, ' '), - lpad(c4, 15, ' '), - lpad(c2, 15, ' ') = lpad(c4, 15, ' ') -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - lpad(c2, 15, ' '), - lpad(c4, 15, ' '), - lpad(c2, 15, ' ') = lpad(c4, 15, ' ') -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### - val_238 val_238 true -PREHOOK: query: select - ltrim(c2), - ltrim(c4), - ltrim(c2) = ltrim(c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - ltrim(c2), - ltrim(c4), - ltrim(c2) = ltrim(c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: select - regexp(c2, 'val'), - regexp(c4, 'val'), - regexp(c2, 'val') = regexp(c4, 'val') -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - regexp(c2, 'val'), - regexp(c4, 'val'), - regexp(c2, 'val') = regexp(c4, 'val') -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -true true true -PREHOOK: query: select - regexp_extract(c2, 'val_([0-9]+)', 1), - regexp_extract(c4, 'val_([0-9]+)', 1), - regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - regexp_extract(c2, 'val_([0-9]+)', 1), - regexp_extract(c4, 'val_([0-9]+)', 1), - regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -238 238 true -PREHOOK: query: select - regexp_replace(c2, 'val', 'replaced'), - regexp_replace(c4, 'val', 'replaced'), - regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - regexp_replace(c2, 'val', 'replaced'), - regexp_replace(c4, 'val', 'replaced'), - regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -replaced_238 replaced_238 true -PREHOOK: query: select - reverse(c2), - reverse(c4), - reverse(c2) = reverse(c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - reverse(c2), - reverse(c4), - reverse(c2) = reverse(c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -832_lav 832_lav true -PREHOOK: query: select - rpad(c2, 15, ' '), - rpad(c4, 15, ' '), - rpad(c2, 15, ' ') = rpad(c4, 15, ' ') -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - rpad(c2, 15, ' '), - rpad(c4, 15, ' '), - rpad(c2, 15, ' ') = rpad(c4, 15, ' ') -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: select - rtrim(c2), - rtrim(c4), - rtrim(c2) = rtrim(c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - rtrim(c2), - rtrim(c4), - rtrim(c2) = rtrim(c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: select - sentences('See spot run. See jane run.'), - sentences(cast('See spot run. See jane run.' as char(50))) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - sentences('See spot run. See jane run.'), - sentences(cast('See spot run. See jane run.' as char(50))) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -[["See","spot","run"],["See","jane","run"]] [["See","spot","run"],["See","jane","run"]] -PREHOOK: query: select - split(c2, '_'), - split(c4, '_') -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - split(c2, '_'), - split(c4, '_') -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -["val","238"] ["val","238"] -PREHOOK: query: select - str_to_map('a:1,b:2,c:3',',',':'), - str_to_map(cast('a:1,b:2,c:3' as char(20)),',',':') -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - str_to_map('a:1,b:2,c:3',',',':'), - str_to_map(cast('a:1,b:2,c:3' as char(20)),',',':') -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -{"a":"1","b":"2","c":"3"} {"a":"1","b":"2","c":"3"} -PREHOOK: query: select - substr(c2, 1, 3), - substr(c4, 1, 3), - substr(c2, 1, 3) = substr(c4, 1, 3) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - substr(c2, 1, 3), - substr(c4, 1, 3), - substr(c2, 1, 3) = substr(c4, 1, 3) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -val val true -PREHOOK: query: select - trim(c2), - trim(c4), - trim(c2) = trim(c4) -from char_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - trim(c2), - trim(c4), - trim(c2) = trim(c4) -from char_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: -- Aggregate Functions -select - compute_stats(c2, 16), - compute_stats(c4, 16) -from char_udf_1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: -- Aggregate Functions -select - compute_stats(c2, 16), - compute_stats(c4, 16) -from char_udf_1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1} -PREHOOK: query: select - min(c2), - min(c4) -from char_udf_1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - min(c2), - min(c4) -from char_udf_1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -val_238 val_238 -PREHOOK: query: select - max(c2), - max(c4) -from char_udf_1 -PREHOOK: type: QUERY -PREHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - max(c2), - max(c4) -from char_udf_1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@char_udf_1 -#### A masked pattern was here #### -val_238 val_238 -PREHOOK: query: drop table char_udf_1 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@char_udf_1 -PREHOOK: Output: default@char_udf_1 -POSTHOOK: query: drop table char_udf_1 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@char_udf_1 -POSTHOOK: Output: default@char_udf_1 diff --git ql/src/test/results/clientpositive/char_udf1.q.out ql/src/test/results/clientpositive/char_udf1.q.out new file mode 100644 index 0000000000000000000000000000000000000000..d84237a6c3c0e8409d9c56a6e280c9abd0fb6938 --- /dev/null +++ ql/src/test/results/clientpositive/char_udf1.q.out @@ -0,0 +1,459 @@ +PREHOOK: query: drop table char_udf_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_udf_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_udf_1 (c1 string, c2 string, c3 char(10), c4 char(20)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_udf_1 +POSTHOOK: query: create table char_udf_1 (c1 string, c2 string, c3 char(10), c4 char(20)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_udf_1 +PREHOOK: query: insert overwrite table char_udf_1 + select key, value, key, value from src where key = '238' limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_udf_1 +POSTHOOK: query: insert overwrite table char_udf_1 + select key, value, key, value from src where key = '238' limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_udf_1 +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- UDFs with char support +select + concat(c1, c2), + concat(c3, c4), + concat(c1, c2) = concat(c3, c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: -- UDFs with char support +select + concat(c1, c2), + concat(c3, c4), + concat(c1, c2) = concat(c3, c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +238val_238 238val_238 true +PREHOOK: query: select + upper(c2), + upper(c4), + upper(c2) = upper(c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + upper(c2), + upper(c4), + upper(c2) = upper(c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +VAL_238 VAL_238 true +PREHOOK: query: select + lower(c2), + lower(c4), + lower(c2) = lower(c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + lower(c2), + lower(c4), + lower(c2) = lower(c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +val_238 val_238 true +PREHOOK: query: -- Scalar UDFs +select + ascii(c2), + ascii(c4), + ascii(c2) = ascii(c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: -- Scalar UDFs +select + ascii(c2), + ascii(c4), + ascii(c2) = ascii(c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +118 118 true +PREHOOK: query: select + concat_ws('|', c1, c2), + concat_ws('|', c3, c4), + concat_ws('|', c1, c2) = concat_ws('|', c3, c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + concat_ws('|', c1, c2), + concat_ws('|', c3, c4), + concat_ws('|', c1, c2) = concat_ws('|', c3, c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +238|val_238 238|val_238 true +PREHOOK: query: select + decode(encode(c2, 'US-ASCII'), 'US-ASCII'), + decode(encode(c4, 'US-ASCII'), 'US-ASCII'), + decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + decode(encode(c2, 'US-ASCII'), 'US-ASCII'), + decode(encode(c4, 'US-ASCII'), 'US-ASCII'), + decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +val_238 val_238 true +PREHOOK: query: select + instr(c2, '_'), + instr(c4, '_'), + instr(c2, '_') = instr(c4, '_') +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + instr(c2, '_'), + instr(c4, '_'), + instr(c2, '_') = instr(c4, '_') +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +4 4 true +PREHOOK: query: select + length(c2), + length(c4), + length(c2) = length(c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + length(c2), + length(c4), + length(c2) = length(c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +7 7 true +PREHOOK: query: select + locate('a', 'abcdabcd', 3), + locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3), + locate('a', 'abcdabcd', 3) = locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + locate('a', 'abcdabcd', 3), + locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3), + locate('a', 'abcdabcd', 3) = locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +5 5 true +PREHOOK: query: select + lpad(c2, 15, ' '), + lpad(c4, 15, ' '), + lpad(c2, 15, ' ') = lpad(c4, 15, ' ') +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + lpad(c2, 15, ' '), + lpad(c4, 15, ' '), + lpad(c2, 15, ' ') = lpad(c4, 15, ' ') +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### + val_238 val_238 true +PREHOOK: query: select + ltrim(c2), + ltrim(c4), + ltrim(c2) = ltrim(c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + ltrim(c2), + ltrim(c4), + ltrim(c2) = ltrim(c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +val_238 val_238 true +PREHOOK: query: -- In hive wiki page https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF +-- we only allow A regexp B, not regexp (A,B). + +select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: -- In hive wiki page https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF +-- we only allow A regexp B, not regexp (A,B). + +select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +true true true +PREHOOK: query: select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +238 238 true +PREHOOK: query: select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +replaced_238 replaced_238 true +PREHOOK: query: select + reverse(c2), + reverse(c4), + reverse(c2) = reverse(c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + reverse(c2), + reverse(c4), + reverse(c2) = reverse(c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +832_lav 832_lav true +PREHOOK: query: select + rpad(c2, 15, ' '), + rpad(c4, 15, ' '), + rpad(c2, 15, ' ') = rpad(c4, 15, ' ') +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + rpad(c2, 15, ' '), + rpad(c4, 15, ' '), + rpad(c2, 15, ' ') = rpad(c4, 15, ' ') +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +val_238 val_238 true +PREHOOK: query: select + rtrim(c2), + rtrim(c4), + rtrim(c2) = rtrim(c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + rtrim(c2), + rtrim(c4), + rtrim(c2) = rtrim(c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +val_238 val_238 true +PREHOOK: query: select + sentences('See spot run. See jane run.'), + sentences(cast('See spot run. See jane run.' as char(50))) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + sentences('See spot run. See jane run.'), + sentences(cast('See spot run. See jane run.' as char(50))) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +[["See","spot","run"],["See","jane","run"]] [["See","spot","run"],["See","jane","run"]] +PREHOOK: query: select + split(c2, '_'), + split(c4, '_') +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + split(c2, '_'), + split(c4, '_') +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +["val","238"] ["val","238"] +PREHOOK: query: select + str_to_map('a:1,b:2,c:3',',',':'), + str_to_map(cast('a:1,b:2,c:3' as char(20)),',',':') +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + str_to_map('a:1,b:2,c:3',',',':'), + str_to_map(cast('a:1,b:2,c:3' as char(20)),',',':') +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +{"a":"1","b":"2","c":"3"} {"a":"1","b":"2","c":"3"} +PREHOOK: query: select + substr(c2, 1, 3), + substr(c4, 1, 3), + substr(c2, 1, 3) = substr(c4, 1, 3) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + substr(c2, 1, 3), + substr(c4, 1, 3), + substr(c2, 1, 3) = substr(c4, 1, 3) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +val val true +PREHOOK: query: select + trim(c2), + trim(c4), + trim(c2) = trim(c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + trim(c2), + trim(c4), + trim(c2) = trim(c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +val_238 val_238 true +PREHOOK: query: -- Aggregate Functions +select + compute_stats(c2, 16), + compute_stats(c4, 16) +from char_udf_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: -- Aggregate Functions +select + compute_stats(c2, 16), + compute_stats(c4, 16) +from char_udf_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} +PREHOOK: query: select + min(c2), + min(c4) +from char_udf_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + min(c2), + min(c4) +from char_udf_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +val_238 val_238 +PREHOOK: query: select + max(c2), + max(c4) +from char_udf_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + max(c2), + max(c4) +from char_udf_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +val_238 val_238 +PREHOOK: query: drop table char_udf_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_udf_1 +PREHOOK: Output: default@char_udf_1 +POSTHOOK: query: drop table char_udf_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_udf_1 +POSTHOOK: Output: default@char_udf_1 diff --git ql/src/test/results/clientpositive/input4.q.java1.7.out ql/src/test/results/clientpositive/input4.q.java1.7.out deleted file mode 100644 index eaeedcb6d229b152e56aeedf3bc83f2fcf64ff73..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/input4.q.java1.7.out +++ /dev/null @@ -1,559 +0,0 @@ -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -CREATE TABLE INPUT4(KEY STRING, VALUE STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@INPUT4 -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -CREATE TABLE INPUT4(KEY STRING, VALUE STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@INPUT4 -PREHOOK: query: EXPLAIN -LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4 -PREHOOK: type: LOAD -POSTHOOK: query: EXPLAIN -LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4 -POSTHOOK: type: LOAD -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Move Operator - tables: - replace: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.input4 - - Stage: Stage-1 - Stats-Aggr Operator - -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@input4 -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@input4 -PREHOOK: query: EXPLAIN FORMATTED -SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN FORMATTED -SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias -POSTHOOK: type: QUERY -{"STAGE DEPENDENCIES":{"Stage-0":{"ROOT STAGE":"TRUE"}},"STAGE PLANS":{"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"TableScan":{"alias:":"input4alias","Statistics:":"Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"value (type: string), key (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE","children":{"ListSink":{}}}}}}}}}} -PREHOOK: query: SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias -PREHOOK: type: QUERY -PREHOOK: Input: default@input4 -#### A masked pattern was here #### -POSTHOOK: query: SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias -POSTHOOK: type: QUERY -POSTHOOK: Input: default@input4 -#### A masked pattern was here #### -val_238 238 -val_86 86 -val_311 311 -val_27 27 -val_165 165 -val_409 409 -val_255 255 -val_278 278 -val_98 98 -val_484 484 -val_265 265 -val_193 193 -val_401 401 -val_150 150 -val_273 273 -val_224 224 -val_369 369 -val_66 66 -val_128 128 -val_213 213 -val_146 146 -val_406 406 -val_429 429 -val_374 374 -val_152 152 -val_469 469 -val_145 145 -val_495 495 -val_37 37 -val_327 327 -val_281 281 -val_277 277 -val_209 209 -val_15 15 -val_82 82 -val_403 403 -val_166 166 -val_417 417 -val_430 430 -val_252 252 -val_292 292 -val_219 219 -val_287 287 -val_153 153 -val_193 193 -val_338 338 -val_446 446 -val_459 459 -val_394 394 -val_237 237 -val_482 482 -val_174 174 -val_413 413 -val_494 494 -val_207 207 -val_199 199 -val_466 466 -val_208 208 -val_174 174 -val_399 399 -val_396 396 -val_247 247 -val_417 417 -val_489 489 -val_162 162 -val_377 377 -val_397 397 -val_309 309 -val_365 365 -val_266 266 -val_439 439 -val_342 342 -val_367 367 -val_325 325 -val_167 167 -val_195 195 -val_475 475 -val_17 17 -val_113 113 -val_155 155 -val_203 203 -val_339 339 -val_0 0 -val_455 455 -val_128 128 -val_311 311 -val_316 316 -val_57 57 -val_302 302 -val_205 205 -val_149 149 -val_438 438 -val_345 345 -val_129 129 -val_170 170 -val_20 20 -val_489 489 -val_157 157 -val_378 378 -val_221 221 -val_92 92 -val_111 111 -val_47 47 -val_72 72 -val_4 4 -val_280 280 -val_35 35 -val_427 427 -val_277 277 -val_208 208 -val_356 356 -val_399 399 -val_169 169 -val_382 382 -val_498 498 -val_125 125 -val_386 386 -val_437 437 -val_469 469 -val_192 192 -val_286 286 -val_187 187 -val_176 176 -val_54 54 -val_459 459 -val_51 51 -val_138 138 -val_103 103 -val_239 239 -val_213 213 -val_216 216 -val_430 430 -val_278 278 -val_176 176 -val_289 289 -val_221 221 -val_65 65 -val_318 318 -val_332 332 -val_311 311 -val_275 275 -val_137 137 -val_241 241 -val_83 83 -val_333 333 -val_180 180 -val_284 284 -val_12 12 -val_230 230 -val_181 181 -val_67 67 -val_260 260 -val_404 404 -val_384 384 -val_489 489 -val_353 353 -val_373 373 -val_272 272 -val_138 138 -val_217 217 -val_84 84 -val_348 348 -val_466 466 -val_58 58 -val_8 8 -val_411 411 -val_230 230 -val_208 208 -val_348 348 -val_24 24 -val_463 463 -val_431 431 -val_179 179 -val_172 172 -val_42 42 -val_129 129 -val_158 158 -val_119 119 -val_496 496 -val_0 0 -val_322 322 -val_197 197 -val_468 468 -val_393 393 -val_454 454 -val_100 100 -val_298 298 -val_199 199 -val_191 191 -val_418 418 -val_96 96 -val_26 26 -val_165 165 -val_327 327 -val_230 230 -val_205 205 -val_120 120 -val_131 131 -val_51 51 -val_404 404 -val_43 43 -val_436 436 -val_156 156 -val_469 469 -val_468 468 -val_308 308 -val_95 95 -val_196 196 -val_288 288 -val_481 481 -val_457 457 -val_98 98 -val_282 282 -val_197 197 -val_187 187 -val_318 318 -val_318 318 -val_409 409 -val_470 470 -val_137 137 -val_369 369 -val_316 316 -val_169 169 -val_413 413 -val_85 85 -val_77 77 -val_0 0 -val_490 490 -val_87 87 -val_364 364 -val_179 179 -val_118 118 -val_134 134 -val_395 395 -val_282 282 -val_138 138 -val_238 238 -val_419 419 -val_15 15 -val_118 118 -val_72 72 -val_90 90 -val_307 307 -val_19 19 -val_435 435 -val_10 10 -val_277 277 -val_273 273 -val_306 306 -val_224 224 -val_309 309 -val_389 389 -val_327 327 -val_242 242 -val_369 369 -val_392 392 -val_272 272 -val_331 331 -val_401 401 -val_242 242 -val_452 452 -val_177 177 -val_226 226 -val_5 5 -val_497 497 -val_402 402 -val_396 396 -val_317 317 -val_395 395 -val_58 58 -val_35 35 -val_336 336 -val_95 95 -val_11 11 -val_168 168 -val_34 34 -val_229 229 -val_233 233 -val_143 143 -val_472 472 -val_322 322 -val_498 498 -val_160 160 -val_195 195 -val_42 42 -val_321 321 -val_430 430 -val_119 119 -val_489 489 -val_458 458 -val_78 78 -val_76 76 -val_41 41 -val_223 223 -val_492 492 -val_149 149 -val_449 449 -val_218 218 -val_228 228 -val_138 138 -val_453 453 -val_30 30 -val_209 209 -val_64 64 -val_468 468 -val_76 76 -val_74 74 -val_342 342 -val_69 69 -val_230 230 -val_33 33 -val_368 368 -val_103 103 -val_296 296 -val_113 113 -val_216 216 -val_367 367 -val_344 344 -val_167 167 -val_274 274 -val_219 219 -val_239 239 -val_485 485 -val_116 116 -val_223 223 -val_256 256 -val_263 263 -val_70 70 -val_487 487 -val_480 480 -val_401 401 -val_288 288 -val_191 191 -val_5 5 -val_244 244 -val_438 438 -val_128 128 -val_467 467 -val_432 432 -val_202 202 -val_316 316 -val_229 229 -val_469 469 -val_463 463 -val_280 280 -val_2 2 -val_35 35 -val_283 283 -val_331 331 -val_235 235 -val_80 80 -val_44 44 -val_193 193 -val_321 321 -val_335 335 -val_104 104 -val_466 466 -val_366 366 -val_175 175 -val_403 403 -val_483 483 -val_53 53 -val_105 105 -val_257 257 -val_406 406 -val_409 409 -val_190 190 -val_406 406 -val_401 401 -val_114 114 -val_258 258 -val_90 90 -val_203 203 -val_262 262 -val_348 348 -val_424 424 -val_12 12 -val_396 396 -val_201 201 -val_217 217 -val_164 164 -val_431 431 -val_454 454 -val_478 478 -val_298 298 -val_125 125 -val_431 431 -val_164 164 -val_424 424 -val_187 187 -val_382 382 -val_5 5 -val_70 70 -val_397 397 -val_480 480 -val_291 291 -val_24 24 -val_351 351 -val_255 255 -val_104 104 -val_70 70 -val_163 163 -val_438 438 -val_119 119 -val_414 414 -val_200 200 -val_491 491 -val_237 237 -val_439 439 -val_360 360 -val_248 248 -val_479 479 -val_305 305 -val_417 417 -val_199 199 -val_444 444 -val_120 120 -val_429 429 -val_169 169 -val_443 443 -val_323 323 -val_325 325 -val_277 277 -val_230 230 -val_478 478 -val_178 178 -val_468 468 -val_310 310 -val_317 317 -val_333 333 -val_493 493 -val_460 460 -val_207 207 -val_249 249 -val_265 265 -val_480 480 -val_83 83 -val_136 136 -val_353 353 -val_172 172 -val_214 214 -val_462 462 -val_233 233 -val_406 406 -val_133 133 -val_175 175 -val_189 189 -val_454 454 -val_375 375 -val_401 401 -val_421 421 -val_407 407 -val_384 384 -val_256 256 -val_26 26 -val_134 134 -val_67 67 -val_384 384 -val_379 379 -val_18 18 -val_462 462 -val_492 492 -val_100 100 -val_298 298 -val_9 9 -val_341 341 -val_498 498 -val_146 146 -val_458 458 -val_362 362 -val_186 186 -val_285 285 -val_348 348 -val_167 167 -val_18 18 -val_273 273 -val_183 183 -val_281 281 -val_344 344 -val_97 97 -val_469 469 -val_315 315 -val_84 84 -val_28 28 -val_37 37 -val_448 448 -val_152 152 -val_348 348 -val_307 307 -val_194 194 -val_414 414 -val_477 477 -val_222 222 -val_126 126 -val_90 90 -val_169 169 -val_403 403 -val_400 400 -val_200 200 -val_97 97 diff --git ql/src/test/results/clientpositive/input4.q.java1.8.out ql/src/test/results/clientpositive/input4.q.java1.8.out deleted file mode 100644 index eaeedcb6d229b152e56aeedf3bc83f2fcf64ff73..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/input4.q.java1.8.out +++ /dev/null @@ -1,559 +0,0 @@ -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -CREATE TABLE INPUT4(KEY STRING, VALUE STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@INPUT4 -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -CREATE TABLE INPUT4(KEY STRING, VALUE STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@INPUT4 -PREHOOK: query: EXPLAIN -LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4 -PREHOOK: type: LOAD -POSTHOOK: query: EXPLAIN -LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4 -POSTHOOK: type: LOAD -STAGE DEPENDENCIES: - Stage-0 is a root stage - Stage-1 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-0 - Move Operator - tables: - replace: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.input4 - - Stage: Stage-1 - Stats-Aggr Operator - -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@input4 -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@input4 -PREHOOK: query: EXPLAIN FORMATTED -SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN FORMATTED -SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias -POSTHOOK: type: QUERY -{"STAGE DEPENDENCIES":{"Stage-0":{"ROOT STAGE":"TRUE"}},"STAGE PLANS":{"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"TableScan":{"alias:":"input4alias","Statistics:":"Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"value (type: string), key (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE","children":{"ListSink":{}}}}}}}}}} -PREHOOK: query: SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias -PREHOOK: type: QUERY -PREHOOK: Input: default@input4 -#### A masked pattern was here #### -POSTHOOK: query: SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias -POSTHOOK: type: QUERY -POSTHOOK: Input: default@input4 -#### A masked pattern was here #### -val_238 238 -val_86 86 -val_311 311 -val_27 27 -val_165 165 -val_409 409 -val_255 255 -val_278 278 -val_98 98 -val_484 484 -val_265 265 -val_193 193 -val_401 401 -val_150 150 -val_273 273 -val_224 224 -val_369 369 -val_66 66 -val_128 128 -val_213 213 -val_146 146 -val_406 406 -val_429 429 -val_374 374 -val_152 152 -val_469 469 -val_145 145 -val_495 495 -val_37 37 -val_327 327 -val_281 281 -val_277 277 -val_209 209 -val_15 15 -val_82 82 -val_403 403 -val_166 166 -val_417 417 -val_430 430 -val_252 252 -val_292 292 -val_219 219 -val_287 287 -val_153 153 -val_193 193 -val_338 338 -val_446 446 -val_459 459 -val_394 394 -val_237 237 -val_482 482 -val_174 174 -val_413 413 -val_494 494 -val_207 207 -val_199 199 -val_466 466 -val_208 208 -val_174 174 -val_399 399 -val_396 396 -val_247 247 -val_417 417 -val_489 489 -val_162 162 -val_377 377 -val_397 397 -val_309 309 -val_365 365 -val_266 266 -val_439 439 -val_342 342 -val_367 367 -val_325 325 -val_167 167 -val_195 195 -val_475 475 -val_17 17 -val_113 113 -val_155 155 -val_203 203 -val_339 339 -val_0 0 -val_455 455 -val_128 128 -val_311 311 -val_316 316 -val_57 57 -val_302 302 -val_205 205 -val_149 149 -val_438 438 -val_345 345 -val_129 129 -val_170 170 -val_20 20 -val_489 489 -val_157 157 -val_378 378 -val_221 221 -val_92 92 -val_111 111 -val_47 47 -val_72 72 -val_4 4 -val_280 280 -val_35 35 -val_427 427 -val_277 277 -val_208 208 -val_356 356 -val_399 399 -val_169 169 -val_382 382 -val_498 498 -val_125 125 -val_386 386 -val_437 437 -val_469 469 -val_192 192 -val_286 286 -val_187 187 -val_176 176 -val_54 54 -val_459 459 -val_51 51 -val_138 138 -val_103 103 -val_239 239 -val_213 213 -val_216 216 -val_430 430 -val_278 278 -val_176 176 -val_289 289 -val_221 221 -val_65 65 -val_318 318 -val_332 332 -val_311 311 -val_275 275 -val_137 137 -val_241 241 -val_83 83 -val_333 333 -val_180 180 -val_284 284 -val_12 12 -val_230 230 -val_181 181 -val_67 67 -val_260 260 -val_404 404 -val_384 384 -val_489 489 -val_353 353 -val_373 373 -val_272 272 -val_138 138 -val_217 217 -val_84 84 -val_348 348 -val_466 466 -val_58 58 -val_8 8 -val_411 411 -val_230 230 -val_208 208 -val_348 348 -val_24 24 -val_463 463 -val_431 431 -val_179 179 -val_172 172 -val_42 42 -val_129 129 -val_158 158 -val_119 119 -val_496 496 -val_0 0 -val_322 322 -val_197 197 -val_468 468 -val_393 393 -val_454 454 -val_100 100 -val_298 298 -val_199 199 -val_191 191 -val_418 418 -val_96 96 -val_26 26 -val_165 165 -val_327 327 -val_230 230 -val_205 205 -val_120 120 -val_131 131 -val_51 51 -val_404 404 -val_43 43 -val_436 436 -val_156 156 -val_469 469 -val_468 468 -val_308 308 -val_95 95 -val_196 196 -val_288 288 -val_481 481 -val_457 457 -val_98 98 -val_282 282 -val_197 197 -val_187 187 -val_318 318 -val_318 318 -val_409 409 -val_470 470 -val_137 137 -val_369 369 -val_316 316 -val_169 169 -val_413 413 -val_85 85 -val_77 77 -val_0 0 -val_490 490 -val_87 87 -val_364 364 -val_179 179 -val_118 118 -val_134 134 -val_395 395 -val_282 282 -val_138 138 -val_238 238 -val_419 419 -val_15 15 -val_118 118 -val_72 72 -val_90 90 -val_307 307 -val_19 19 -val_435 435 -val_10 10 -val_277 277 -val_273 273 -val_306 306 -val_224 224 -val_309 309 -val_389 389 -val_327 327 -val_242 242 -val_369 369 -val_392 392 -val_272 272 -val_331 331 -val_401 401 -val_242 242 -val_452 452 -val_177 177 -val_226 226 -val_5 5 -val_497 497 -val_402 402 -val_396 396 -val_317 317 -val_395 395 -val_58 58 -val_35 35 -val_336 336 -val_95 95 -val_11 11 -val_168 168 -val_34 34 -val_229 229 -val_233 233 -val_143 143 -val_472 472 -val_322 322 -val_498 498 -val_160 160 -val_195 195 -val_42 42 -val_321 321 -val_430 430 -val_119 119 -val_489 489 -val_458 458 -val_78 78 -val_76 76 -val_41 41 -val_223 223 -val_492 492 -val_149 149 -val_449 449 -val_218 218 -val_228 228 -val_138 138 -val_453 453 -val_30 30 -val_209 209 -val_64 64 -val_468 468 -val_76 76 -val_74 74 -val_342 342 -val_69 69 -val_230 230 -val_33 33 -val_368 368 -val_103 103 -val_296 296 -val_113 113 -val_216 216 -val_367 367 -val_344 344 -val_167 167 -val_274 274 -val_219 219 -val_239 239 -val_485 485 -val_116 116 -val_223 223 -val_256 256 -val_263 263 -val_70 70 -val_487 487 -val_480 480 -val_401 401 -val_288 288 -val_191 191 -val_5 5 -val_244 244 -val_438 438 -val_128 128 -val_467 467 -val_432 432 -val_202 202 -val_316 316 -val_229 229 -val_469 469 -val_463 463 -val_280 280 -val_2 2 -val_35 35 -val_283 283 -val_331 331 -val_235 235 -val_80 80 -val_44 44 -val_193 193 -val_321 321 -val_335 335 -val_104 104 -val_466 466 -val_366 366 -val_175 175 -val_403 403 -val_483 483 -val_53 53 -val_105 105 -val_257 257 -val_406 406 -val_409 409 -val_190 190 -val_406 406 -val_401 401 -val_114 114 -val_258 258 -val_90 90 -val_203 203 -val_262 262 -val_348 348 -val_424 424 -val_12 12 -val_396 396 -val_201 201 -val_217 217 -val_164 164 -val_431 431 -val_454 454 -val_478 478 -val_298 298 -val_125 125 -val_431 431 -val_164 164 -val_424 424 -val_187 187 -val_382 382 -val_5 5 -val_70 70 -val_397 397 -val_480 480 -val_291 291 -val_24 24 -val_351 351 -val_255 255 -val_104 104 -val_70 70 -val_163 163 -val_438 438 -val_119 119 -val_414 414 -val_200 200 -val_491 491 -val_237 237 -val_439 439 -val_360 360 -val_248 248 -val_479 479 -val_305 305 -val_417 417 -val_199 199 -val_444 444 -val_120 120 -val_429 429 -val_169 169 -val_443 443 -val_323 323 -val_325 325 -val_277 277 -val_230 230 -val_478 478 -val_178 178 -val_468 468 -val_310 310 -val_317 317 -val_333 333 -val_493 493 -val_460 460 -val_207 207 -val_249 249 -val_265 265 -val_480 480 -val_83 83 -val_136 136 -val_353 353 -val_172 172 -val_214 214 -val_462 462 -val_233 233 -val_406 406 -val_133 133 -val_175 175 -val_189 189 -val_454 454 -val_375 375 -val_401 401 -val_421 421 -val_407 407 -val_384 384 -val_256 256 -val_26 26 -val_134 134 -val_67 67 -val_384 384 -val_379 379 -val_18 18 -val_462 462 -val_492 492 -val_100 100 -val_298 298 -val_9 9 -val_341 341 -val_498 498 -val_146 146 -val_458 458 -val_362 362 -val_186 186 -val_285 285 -val_348 348 -val_167 167 -val_18 18 -val_273 273 -val_183 183 -val_281 281 -val_344 344 -val_97 97 -val_469 469 -val_315 315 -val_84 84 -val_28 28 -val_37 37 -val_448 448 -val_152 152 -val_348 348 -val_307 307 -val_194 194 -val_414 414 -val_477 477 -val_222 222 -val_126 126 -val_90 90 -val_169 169 -val_403 403 -val_400 400 -val_200 200 -val_97 97 diff --git ql/src/test/results/clientpositive/input4.q.out ql/src/test/results/clientpositive/input4.q.out new file mode 100644 index 0000000000000000000000000000000000000000..83912f660886c104c40188c1658505d76c76caf7 --- /dev/null +++ ql/src/test/results/clientpositive/input4.q.out @@ -0,0 +1,555 @@ +PREHOOK: query: CREATE TABLE INPUT4(KEY STRING, VALUE STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@INPUT4 +POSTHOOK: query: CREATE TABLE INPUT4(KEY STRING, VALUE STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@INPUT4 +PREHOOK: query: EXPLAIN +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4 +PREHOOK: type: LOAD +POSTHOOK: query: EXPLAIN +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4 +POSTHOOK: type: LOAD +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.input4 + + Stage: Stage-1 + Stats-Aggr Operator + +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@input4 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@input4 +PREHOOK: query: EXPLAIN FORMATTED +SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FORMATTED +SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias +POSTHOOK: type: QUERY +{"STAGE DEPENDENCIES":{"Stage-0":{"ROOT STAGE":"TRUE"}},"STAGE PLANS":{"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"TableScan":{"alias:":"input4alias","Statistics:":"Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"value (type: string), key (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE","children":{"ListSink":{}}}}}}}}}} +PREHOOK: query: SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias +PREHOOK: type: QUERY +PREHOOK: Input: default@input4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias +POSTHOOK: type: QUERY +POSTHOOK: Input: default@input4 +#### A masked pattern was here #### +val_238 238 +val_86 86 +val_311 311 +val_27 27 +val_165 165 +val_409 409 +val_255 255 +val_278 278 +val_98 98 +val_484 484 +val_265 265 +val_193 193 +val_401 401 +val_150 150 +val_273 273 +val_224 224 +val_369 369 +val_66 66 +val_128 128 +val_213 213 +val_146 146 +val_406 406 +val_429 429 +val_374 374 +val_152 152 +val_469 469 +val_145 145 +val_495 495 +val_37 37 +val_327 327 +val_281 281 +val_277 277 +val_209 209 +val_15 15 +val_82 82 +val_403 403 +val_166 166 +val_417 417 +val_430 430 +val_252 252 +val_292 292 +val_219 219 +val_287 287 +val_153 153 +val_193 193 +val_338 338 +val_446 446 +val_459 459 +val_394 394 +val_237 237 +val_482 482 +val_174 174 +val_413 413 +val_494 494 +val_207 207 +val_199 199 +val_466 466 +val_208 208 +val_174 174 +val_399 399 +val_396 396 +val_247 247 +val_417 417 +val_489 489 +val_162 162 +val_377 377 +val_397 397 +val_309 309 +val_365 365 +val_266 266 +val_439 439 +val_342 342 +val_367 367 +val_325 325 +val_167 167 +val_195 195 +val_475 475 +val_17 17 +val_113 113 +val_155 155 +val_203 203 +val_339 339 +val_0 0 +val_455 455 +val_128 128 +val_311 311 +val_316 316 +val_57 57 +val_302 302 +val_205 205 +val_149 149 +val_438 438 +val_345 345 +val_129 129 +val_170 170 +val_20 20 +val_489 489 +val_157 157 +val_378 378 +val_221 221 +val_92 92 +val_111 111 +val_47 47 +val_72 72 +val_4 4 +val_280 280 +val_35 35 +val_427 427 +val_277 277 +val_208 208 +val_356 356 +val_399 399 +val_169 169 +val_382 382 +val_498 498 +val_125 125 +val_386 386 +val_437 437 +val_469 469 +val_192 192 +val_286 286 +val_187 187 +val_176 176 +val_54 54 +val_459 459 +val_51 51 +val_138 138 +val_103 103 +val_239 239 +val_213 213 +val_216 216 +val_430 430 +val_278 278 +val_176 176 +val_289 289 +val_221 221 +val_65 65 +val_318 318 +val_332 332 +val_311 311 +val_275 275 +val_137 137 +val_241 241 +val_83 83 +val_333 333 +val_180 180 +val_284 284 +val_12 12 +val_230 230 +val_181 181 +val_67 67 +val_260 260 +val_404 404 +val_384 384 +val_489 489 +val_353 353 +val_373 373 +val_272 272 +val_138 138 +val_217 217 +val_84 84 +val_348 348 +val_466 466 +val_58 58 +val_8 8 +val_411 411 +val_230 230 +val_208 208 +val_348 348 +val_24 24 +val_463 463 +val_431 431 +val_179 179 +val_172 172 +val_42 42 +val_129 129 +val_158 158 +val_119 119 +val_496 496 +val_0 0 +val_322 322 +val_197 197 +val_468 468 +val_393 393 +val_454 454 +val_100 100 +val_298 298 +val_199 199 +val_191 191 +val_418 418 +val_96 96 +val_26 26 +val_165 165 +val_327 327 +val_230 230 +val_205 205 +val_120 120 +val_131 131 +val_51 51 +val_404 404 +val_43 43 +val_436 436 +val_156 156 +val_469 469 +val_468 468 +val_308 308 +val_95 95 +val_196 196 +val_288 288 +val_481 481 +val_457 457 +val_98 98 +val_282 282 +val_197 197 +val_187 187 +val_318 318 +val_318 318 +val_409 409 +val_470 470 +val_137 137 +val_369 369 +val_316 316 +val_169 169 +val_413 413 +val_85 85 +val_77 77 +val_0 0 +val_490 490 +val_87 87 +val_364 364 +val_179 179 +val_118 118 +val_134 134 +val_395 395 +val_282 282 +val_138 138 +val_238 238 +val_419 419 +val_15 15 +val_118 118 +val_72 72 +val_90 90 +val_307 307 +val_19 19 +val_435 435 +val_10 10 +val_277 277 +val_273 273 +val_306 306 +val_224 224 +val_309 309 +val_389 389 +val_327 327 +val_242 242 +val_369 369 +val_392 392 +val_272 272 +val_331 331 +val_401 401 +val_242 242 +val_452 452 +val_177 177 +val_226 226 +val_5 5 +val_497 497 +val_402 402 +val_396 396 +val_317 317 +val_395 395 +val_58 58 +val_35 35 +val_336 336 +val_95 95 +val_11 11 +val_168 168 +val_34 34 +val_229 229 +val_233 233 +val_143 143 +val_472 472 +val_322 322 +val_498 498 +val_160 160 +val_195 195 +val_42 42 +val_321 321 +val_430 430 +val_119 119 +val_489 489 +val_458 458 +val_78 78 +val_76 76 +val_41 41 +val_223 223 +val_492 492 +val_149 149 +val_449 449 +val_218 218 +val_228 228 +val_138 138 +val_453 453 +val_30 30 +val_209 209 +val_64 64 +val_468 468 +val_76 76 +val_74 74 +val_342 342 +val_69 69 +val_230 230 +val_33 33 +val_368 368 +val_103 103 +val_296 296 +val_113 113 +val_216 216 +val_367 367 +val_344 344 +val_167 167 +val_274 274 +val_219 219 +val_239 239 +val_485 485 +val_116 116 +val_223 223 +val_256 256 +val_263 263 +val_70 70 +val_487 487 +val_480 480 +val_401 401 +val_288 288 +val_191 191 +val_5 5 +val_244 244 +val_438 438 +val_128 128 +val_467 467 +val_432 432 +val_202 202 +val_316 316 +val_229 229 +val_469 469 +val_463 463 +val_280 280 +val_2 2 +val_35 35 +val_283 283 +val_331 331 +val_235 235 +val_80 80 +val_44 44 +val_193 193 +val_321 321 +val_335 335 +val_104 104 +val_466 466 +val_366 366 +val_175 175 +val_403 403 +val_483 483 +val_53 53 +val_105 105 +val_257 257 +val_406 406 +val_409 409 +val_190 190 +val_406 406 +val_401 401 +val_114 114 +val_258 258 +val_90 90 +val_203 203 +val_262 262 +val_348 348 +val_424 424 +val_12 12 +val_396 396 +val_201 201 +val_217 217 +val_164 164 +val_431 431 +val_454 454 +val_478 478 +val_298 298 +val_125 125 +val_431 431 +val_164 164 +val_424 424 +val_187 187 +val_382 382 +val_5 5 +val_70 70 +val_397 397 +val_480 480 +val_291 291 +val_24 24 +val_351 351 +val_255 255 +val_104 104 +val_70 70 +val_163 163 +val_438 438 +val_119 119 +val_414 414 +val_200 200 +val_491 491 +val_237 237 +val_439 439 +val_360 360 +val_248 248 +val_479 479 +val_305 305 +val_417 417 +val_199 199 +val_444 444 +val_120 120 +val_429 429 +val_169 169 +val_443 443 +val_323 323 +val_325 325 +val_277 277 +val_230 230 +val_478 478 +val_178 178 +val_468 468 +val_310 310 +val_317 317 +val_333 333 +val_493 493 +val_460 460 +val_207 207 +val_249 249 +val_265 265 +val_480 480 +val_83 83 +val_136 136 +val_353 353 +val_172 172 +val_214 214 +val_462 462 +val_233 233 +val_406 406 +val_133 133 +val_175 175 +val_189 189 +val_454 454 +val_375 375 +val_401 401 +val_421 421 +val_407 407 +val_384 384 +val_256 256 +val_26 26 +val_134 134 +val_67 67 +val_384 384 +val_379 379 +val_18 18 +val_462 462 +val_492 492 +val_100 100 +val_298 298 +val_9 9 +val_341 341 +val_498 498 +val_146 146 +val_458 458 +val_362 362 +val_186 186 +val_285 285 +val_348 348 +val_167 167 +val_18 18 +val_273 273 +val_183 183 +val_281 281 +val_344 344 +val_97 97 +val_469 469 +val_315 315 +val_84 84 +val_28 28 +val_37 37 +val_448 448 +val_152 152 +val_348 348 +val_307 307 +val_194 194 +val_414 414 +val_477 477 +val_222 222 +val_126 126 +val_90 90 +val_169 169 +val_403 403 +val_400 400 +val_200 200 +val_97 97 diff --git ql/src/test/results/clientpositive/join0.q.java1.7.out ql/src/test/results/clientpositive/join0.q.java1.7.out deleted file mode 100644 index 343f8a413e58ac1883728add01162e73ae84acbf..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/join0.q.java1.7.out +++ /dev/null @@ -1,240 +0,0 @@ -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT --- SORT_QUERY_RESULTS - -EXPLAIN -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT --- SORT_QUERY_RESULTS - -EXPLAIN -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product -PREHOOK: query: EXPLAIN FORMATTED -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN FORMATTED -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -{"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-1"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: string), _col1 (type: string)"}}}}}}}},{"TableScan":{"alias:":"src","Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: string), _col1 (type: string)"}}}}}}}}],"Reduce Operator Tree:":{"Join Operator":{"condition map:":[{"":"Inner Join 0 to 1"}],"keys:":{},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe"}}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"children":{"Reduce Output Operator":{"key expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)","sort order:":"++++","Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE"}}}}],"Reduce Operator Tree:":{"Select Operator":{"expressions:":"KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product -PREHOOK: query: SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 2 val_2 -0 val_0 2 val_2 -0 val_0 2 val_2 -0 val_0 4 val_4 -0 val_0 4 val_4 -0 val_0 4 val_4 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 8 val_8 -0 val_0 8 val_8 -0 val_0 8 val_8 -0 val_0 9 val_9 -0 val_0 9 val_9 -0 val_0 9 val_9 -2 val_2 0 val_0 -2 val_2 0 val_0 -2 val_2 0 val_0 -2 val_2 2 val_2 -2 val_2 4 val_4 -2 val_2 5 val_5 -2 val_2 5 val_5 -2 val_2 5 val_5 -2 val_2 8 val_8 -2 val_2 9 val_9 -4 val_4 0 val_0 -4 val_4 0 val_0 -4 val_4 0 val_0 -4 val_4 2 val_2 -4 val_4 4 val_4 -4 val_4 5 val_5 -4 val_4 5 val_5 -4 val_4 5 val_5 -4 val_4 8 val_8 -4 val_4 9 val_9 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 2 val_2 -5 val_5 2 val_2 -5 val_5 2 val_2 -5 val_5 4 val_4 -5 val_5 4 val_4 -5 val_5 4 val_4 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 8 val_8 -5 val_5 8 val_8 -5 val_5 8 val_8 -5 val_5 9 val_9 -5 val_5 9 val_9 -5 val_5 9 val_9 -8 val_8 0 val_0 -8 val_8 0 val_0 -8 val_8 0 val_0 -8 val_8 2 val_2 -8 val_8 4 val_4 -8 val_8 5 val_5 -8 val_8 5 val_5 -8 val_8 5 val_5 -8 val_8 8 val_8 -8 val_8 9 val_9 -9 val_9 0 val_0 -9 val_9 0 val_0 -9 val_9 0 val_0 -9 val_9 2 val_2 -9 val_9 4 val_4 -9 val_9 5 val_5 -9 val_9 5 val_5 -9 val_9 5 val_5 -9 val_9 8 val_8 -9 val_9 9 val_9 diff --git ql/src/test/results/clientpositive/join0.q.java1.8.out ql/src/test/results/clientpositive/join0.q.java1.8.out deleted file mode 100644 index 343f8a413e58ac1883728add01162e73ae84acbf..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/join0.q.java1.8.out +++ /dev/null @@ -1,240 +0,0 @@ -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT --- SORT_QUERY_RESULTS - -EXPLAIN -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT --- SORT_QUERY_RESULTS - -EXPLAIN -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product -PREHOOK: query: EXPLAIN FORMATTED -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN FORMATTED -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -{"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-1"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: string), _col1 (type: string)"}}}}}}}},{"TableScan":{"alias:":"src","Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: string), _col1 (type: string)"}}}}}}}}],"Reduce Operator Tree:":{"Join Operator":{"condition map:":[{"":"Inner Join 0 to 1"}],"keys:":{},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe"}}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"children":{"Reduce Output Operator":{"key expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)","sort order:":"++++","Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE"}}}}],"Reduce Operator Tree:":{"Select Operator":{"expressions:":"KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product -PREHOOK: query: SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 2 val_2 -0 val_0 2 val_2 -0 val_0 2 val_2 -0 val_0 4 val_4 -0 val_0 4 val_4 -0 val_0 4 val_4 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 8 val_8 -0 val_0 8 val_8 -0 val_0 8 val_8 -0 val_0 9 val_9 -0 val_0 9 val_9 -0 val_0 9 val_9 -2 val_2 0 val_0 -2 val_2 0 val_0 -2 val_2 0 val_0 -2 val_2 2 val_2 -2 val_2 4 val_4 -2 val_2 5 val_5 -2 val_2 5 val_5 -2 val_2 5 val_5 -2 val_2 8 val_8 -2 val_2 9 val_9 -4 val_4 0 val_0 -4 val_4 0 val_0 -4 val_4 0 val_0 -4 val_4 2 val_2 -4 val_4 4 val_4 -4 val_4 5 val_5 -4 val_4 5 val_5 -4 val_4 5 val_5 -4 val_4 8 val_8 -4 val_4 9 val_9 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 2 val_2 -5 val_5 2 val_2 -5 val_5 2 val_2 -5 val_5 4 val_4 -5 val_5 4 val_4 -5 val_5 4 val_4 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 8 val_8 -5 val_5 8 val_8 -5 val_5 8 val_8 -5 val_5 9 val_9 -5 val_5 9 val_9 -5 val_5 9 val_9 -8 val_8 0 val_0 -8 val_8 0 val_0 -8 val_8 0 val_0 -8 val_8 2 val_2 -8 val_8 4 val_4 -8 val_8 5 val_5 -8 val_8 5 val_5 -8 val_8 5 val_5 -8 val_8 8 val_8 -8 val_8 9 val_9 -9 val_9 0 val_0 -9 val_9 0 val_0 -9 val_9 0 val_0 -9 val_9 2 val_2 -9 val_9 4 val_4 -9 val_9 5 val_5 -9 val_9 5 val_5 -9 val_9 5 val_5 -9 val_9 8 val_8 -9 val_9 9 val_9 diff --git ql/src/test/results/clientpositive/join0.q.out ql/src/test/results/clientpositive/join0.q.out new file mode 100644 index 0000000000000000000000000000000000000000..59122e25482013255b2aee73a32776b93b3abc71 --- /dev/null +++ ql/src/test/results/clientpositive/join0.q.out @@ -0,0 +1,238 @@ +Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +PREHOOK: type: QUERY +POSTHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN FORMATTED +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FORMATTED +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +POSTHOOK: type: QUERY +{"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-1"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: string), _col1 (type: string)"}}}}}}}},{"TableScan":{"alias:":"src","Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: string), _col1 (type: string)"}}}}}}}}],"Reduce Operator Tree:":{"Join Operator":{"condition map:":[{"":"Inner Join 0 to 1"}],"keys:":{},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe"}}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"children":{"Reduce Output Operator":{"key expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)","sort order:":"++++","Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE"}}}}],"Reduce Operator Tree:":{"Select Operator":{"expressions:":"KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} +Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 2 val_2 +0 val_0 2 val_2 +0 val_0 2 val_2 +0 val_0 4 val_4 +0 val_0 4 val_4 +0 val_0 4 val_4 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 8 val_8 +0 val_0 8 val_8 +0 val_0 8 val_8 +0 val_0 9 val_9 +0 val_0 9 val_9 +0 val_0 9 val_9 +2 val_2 0 val_0 +2 val_2 0 val_0 +2 val_2 0 val_0 +2 val_2 2 val_2 +2 val_2 4 val_4 +2 val_2 5 val_5 +2 val_2 5 val_5 +2 val_2 5 val_5 +2 val_2 8 val_8 +2 val_2 9 val_9 +4 val_4 0 val_0 +4 val_4 0 val_0 +4 val_4 0 val_0 +4 val_4 2 val_2 +4 val_4 4 val_4 +4 val_4 5 val_5 +4 val_4 5 val_5 +4 val_4 5 val_5 +4 val_4 8 val_8 +4 val_4 9 val_9 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 2 val_2 +5 val_5 2 val_2 +5 val_5 2 val_2 +5 val_5 4 val_4 +5 val_5 4 val_4 +5 val_5 4 val_4 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 8 val_8 +5 val_5 8 val_8 +5 val_5 8 val_8 +5 val_5 9 val_9 +5 val_5 9 val_9 +5 val_5 9 val_9 +8 val_8 0 val_0 +8 val_8 0 val_0 +8 val_8 0 val_0 +8 val_8 2 val_2 +8 val_8 4 val_4 +8 val_8 5 val_5 +8 val_8 5 val_5 +8 val_8 5 val_5 +8 val_8 8 val_8 +8 val_8 9 val_9 +9 val_9 0 val_0 +9 val_9 0 val_0 +9 val_9 0 val_0 +9 val_9 2 val_2 +9 val_9 4 val_4 +9 val_9 5 val_5 +9 val_9 5 val_5 +9 val_9 5 val_5 +9 val_9 8 val_8 +9 val_9 9 val_9 diff --git ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.7.out ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.7.out deleted file mode 100644 index 8447e860513bebe083714912560a88e74ba4ce08..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.7.out +++ /dev/null @@ -1,361 +0,0 @@ -PREHOOK: query: -- run this test case in minimr to ensure it works in cluster --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key) on ('484','51','103') - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- run this test case in minimr to ensure it works in cluster --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key) on ('484','51','103') - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_static_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 - numRows 500 - rawDataSize 4812 - totalSize 5520 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key] -Skewed Values: [[484], [51], [103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484, [103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103, [51]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=51} -Storage Desc Params: - serialization.format 1 diff --git ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.8.out ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.8.out deleted file mode 100644 index d1b9598d9033824dae9e185f1600620a7f605a8c..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_10.q.java1.8.out +++ /dev/null @@ -1,389 +0,0 @@ -PREHOOK: query: -- run this test case in minimr to ensure it works in cluster --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key) on ('484','51','103') - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- run this test case in minimr to ensure it works in cluster --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key) on ('484','51','103') - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_static_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - src - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - list_bucketing_static_part - TOK_PARTSPEC - TOK_PARTVAL - ds - '2008-04-08' - TOK_PARTVAL - hr - '11' - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - value - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 4 - numRows 0 - rawDataSize 0 - totalSize 5520 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key] -Skewed Values: [[484], [51], [103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103, [51]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=51, [484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484} -Storage Desc Params: - serialization.format 1 diff --git ql/src/test/results/clientpositive/list_bucket_dml_10.q.out ql/src/test/results/clientpositive/list_bucket_dml_10.q.out new file mode 100644 index 0000000000000000000000000000000000000000..d4681b702dad6436da96c5fa1c83ce17ca696b6f --- /dev/null +++ ql/src/test/results/clientpositive/list_bucket_dml_10.q.out @@ -0,0 +1,359 @@ +PREHOOK: query: -- run this test case in minimr to ensure it works in cluster + +-- list bucketing DML: static partition. multiple skewed columns. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key) on ('484','51','103') + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_static_part +POSTHOOK: query: -- run this test case in minimr to ensure it works in cluster + +-- list bucketing DML: static partition. multiple skewed columns. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key) on ('484','51','103') + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_static_part +PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 11 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- check DML result +show partitions list_bucketing_static_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: -- check DML result +show partitions list_bucketing_static_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_static_part +ds=2008-04-08/hr=11 +PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_static_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: list_bucketing_static_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 4 + numRows 500 + rawDataSize 4812 + totalSize 5520 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key] +Skewed Values: [[484], [51], [103]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103, [51]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=51, [484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484} +Storage Desc Params: + serialization.format 1 diff --git ql/src/test/results/clientpositive/list_bucket_dml_11.q.java1.7.out ql/src/test/results/clientpositive/list_bucket_dml_11.q.java1.7.out deleted file mode 100644 index b58d17c1e7b918fe9793ab36033cb02f36d14142..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_11.q.java1.7.out +++ /dev/null @@ -1,329 +0,0 @@ -PREHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (value) on ('val_466','val_287','val_82') - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (value) on ('val_466','val_287','val_82') - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_static_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 - numRows 500 - rawDataSize 4812 - totalSize 5522 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [value] -Skewed Values: [[val_466], [val_287], [val_82]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[val_82]=/list_bucketing_static_part/ds=2008-04-08/hr=11/value=val_82, [val_287]=/list_bucketing_static_part/ds=2008-04-08/hr=11/value=val_287, [val_466]=/list_bucketing_static_part/ds=2008-04-08/hr=11/value=val_466} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: explain extended -select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466" -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466" -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - numFiles 4 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 4812 - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 5522 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - Processor Tree: - TableScan - alias: list_bucketing_static_part - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = 'val_466') (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), 'val_466' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - ListSink - -PREHOOK: query: select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466" -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466" -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -466 val_466 -466 val_466 -466 val_466 -PREHOOK: query: drop table list_bucketing_static_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: drop table list_bucketing_static_part -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Output: default@list_bucketing_static_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_11.q.java1.8.out ql/src/test/results/clientpositive/list_bucket_dml_11.q.java1.8.out deleted file mode 100644 index 00a623544267ad23c62c36289ff5ee78ee2b26ce..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_11.q.java1.8.out +++ /dev/null @@ -1,424 +0,0 @@ -PREHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (value) on ('val_466','val_287','val_82') - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (value) on ('val_466','val_287','val_82') - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_static_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - src - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - list_bucketing_static_part - TOK_PARTSPEC - TOK_PARTVAL - ds - '2008-04-08' - TOK_PARTVAL - hr - '11' - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - value - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 4 - numRows 500 - rawDataSize 4812 - totalSize 5522 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [value] -Skewed Values: [[val_466], [val_287], [val_82]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[val_287]=/list_bucketing_static_part/ds=2008-04-08/hr=11/value=val_287, [val_82]=/list_bucketing_static_part/ds=2008-04-08/hr=11/value=val_82, [val_466]=/list_bucketing_static_part/ds=2008-04-08/hr=11/value=val_466} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: explain extended -select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466" -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466" -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - list_bucketing_static_part - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - value - TOK_WHERE - and - and - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - = - TOK_TABLE_OR_COL - hr - '11' - = - TOK_TABLE_OR_COL - value - "val_466" - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: list_bucketing_static_part - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = 'val_466') (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), 'val_466' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: value=val_466 - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - numFiles 4 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 4812 - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 5522 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - Truncated Path -> Alias: - /list_bucketing_static_part/ds=2008-04-08/hr=11/value=val_466 [$hdt$_0:list_bucketing_static_part] - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466" -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466" -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -466 val_466 -466 val_466 -466 val_466 -PREHOOK: query: drop table list_bucketing_static_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: drop table list_bucketing_static_part -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Output: default@list_bucketing_static_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_11.q.out ql/src/test/results/clientpositive/list_bucket_dml_11.q.out new file mode 100644 index 0000000000000000000000000000000000000000..ecf54a81a0b009c7a3fdc9248f5ea66ddd40b59d --- /dev/null +++ ql/src/test/results/clientpositive/list_bucket_dml_11.q.out @@ -0,0 +1,327 @@ +PREHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) + +-- list bucketing DML: static partition. multiple skewed columns. + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (value) on ('val_466','val_287','val_82') + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_static_part +POSTHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) + +-- list bucketing DML: static partition. multiple skewed columns. + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (value) on ('val_466','val_287','val_82') + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_static_part +PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 11 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- check DML result +show partitions list_bucketing_static_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: -- check DML result +show partitions list_bucketing_static_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_static_part +ds=2008-04-08/hr=11 +PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_static_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: list_bucketing_static_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 4 + numRows 500 + rawDataSize 4812 + totalSize 5522 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [value] +Skewed Values: [[val_466], [val_287], [val_82]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[val_287]=/list_bucketing_static_part/ds=2008-04-08/hr=11/value=val_287, [val_82]=/list_bucketing_static_part/ds=2008-04-08/hr=11/value=val_82, [val_466]=/list_bucketing_static_part/ds=2008-04-08/hr=11/value=val_466} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended +select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466" +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + numFiles 4 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 4812 + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 5522 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + Processor Tree: + TableScan + alias: list_bucketing_static_part + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 'val_466') (type: boolean) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), 'val_466' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466" +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_static_part +PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_static_part +POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +466 val_466 +466 val_466 +466 val_466 +PREHOOK: query: drop table list_bucketing_static_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@list_bucketing_static_part +PREHOOK: Output: default@list_bucketing_static_part +POSTHOOK: query: drop table list_bucketing_static_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@list_bucketing_static_part +POSTHOOK: Output: default@list_bucketing_static_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.7.out ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.7.out deleted file mode 100644 index 0be7f4e8c276d1f6fc68a4f3661c418637fbc16f..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.7.out +++ /dev/null @@ -1,426 +0,0 @@ -PREHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns -create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) - partitioned by (ds String, hr String) - skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_mul_col -POSTHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns -create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) - partitioned by (ds String, hr String) - skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_mul_col -PREHOOK: query: -- list bucketing DML -explain extended -insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') -select 1, key, 1, value, 1 from src -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML -explain extended -insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') -select 1, key, 1, value, 1 from src -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: '1' (type: string), key (type: string), '1' (type: string), value (type: string), '1' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') -select 1, key, 1, value, 1 from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') -select 1, key, 1, value, 1 from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION [] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION [] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION [] -PREHOOK: query: -- check DML result -show partitions list_bucketing_mul_col -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: query: -- check DML result -show partitions list_bucketing_mul_col -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_mul_col -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_mul_col -# col_name data_type comment - -col1 string -col2 string -col3 string -col4 string -col5 string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_mul_col -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 - numRows 500 - rawDataSize 6312 - totalSize 7094 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [col2, col4] -Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[82, val_82]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=82/col4=val_82, [466, val_466]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=466/col4=val_466, [287, val_287]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=287/col4=val_287} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: explain extended -select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - numFiles 4 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 6312 - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 7094 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - name: default.list_bucketing_mul_col - Processor Tree: - TableScan - alias: list_bucketing_mul_col - Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - ListSink - -PREHOOK: query: select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_mul_col -PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -1 466 1 val_466 1 2008-04-08 11 -1 466 1 val_466 1 2008-04-08 11 -1 466 1 val_466 1 2008-04-08 11 -PREHOOK: query: explain extended -select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - numFiles 4 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 6312 - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 7094 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - name: default.list_bucketing_mul_col - Processor Tree: - TableScan - alias: list_bucketing_mul_col - Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((col2 = '382') and (col4 = 'val_382')) (type: boolean) - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col1 (type: string), '382' (type: string), col3 (type: string), 'val_382' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - ListSink - -PREHOOK: query: select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_mul_col -PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -1 382 1 val_382 1 2008-04-08 11 -1 382 1 val_382 1 2008-04-08 11 -PREHOOK: query: drop table list_bucketing_mul_col -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_mul_col -PREHOOK: Output: default@list_bucketing_mul_col -POSTHOOK: query: drop table list_bucketing_mul_col -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: Output: default@list_bucketing_mul_col diff --git ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.8.out ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.8.out deleted file mode 100644 index 6d2298bcc481d3eaab41b7dec7a1e74cf0d5b655..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_12.q.java1.8.out +++ /dev/null @@ -1,596 +0,0 @@ -PREHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns -create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) - partitioned by (ds String, hr String) - skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_mul_col -POSTHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns -create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) - partitioned by (ds String, hr String) - skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_mul_col -PREHOOK: query: -- list bucketing DML -explain extended -insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') -select 1, key, 1, value, 1 from src -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML -explain extended -insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') -select 1, key, 1, value, 1 from src -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - src - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - list_bucketing_mul_col - TOK_PARTSPEC - TOK_PARTVAL - ds - '2008-04-08' - TOK_PARTVAL - hr - '11' - TOK_SELECT - TOK_SELEXPR - 1 - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - 1 - TOK_SELEXPR - TOK_TABLE_OR_COL - value - TOK_SELEXPR - 1 - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: UDFToString(1) (type: string), key (type: string), UDFToString(1) (type: string), value (type: string), UDFToString(1) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [$hdt$_0:src] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') -select 1, key, 1, value, 1 from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') -select 1, key, 1, value, 1 from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION [] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION [] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION [] -PREHOOK: query: -- check DML result -show partitions list_bucketing_mul_col -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: query: -- check DML result -show partitions list_bucketing_mul_col -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_mul_col -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_mul_col -# col_name data_type comment - -col1 string -col2 string -col3 string -col4 string -col5 string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_mul_col -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 4 - numRows 500 - rawDataSize 6312 - totalSize 7094 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [col2, col4] -Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[466, val_466]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=466/col4=val_466, [287, val_287]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=287/col4=val_287, [82, val_82]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=82/col4=val_82} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: explain extended -select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - list_bucketing_mul_col - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - and - and - and - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - = - TOK_TABLE_OR_COL - hr - '11' - = - TOK_TABLE_OR_COL - col2 - "466" - = - TOK_TABLE_OR_COL - col4 - "val_466" - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: list_bucketing_mul_col - Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:string:string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: col4=val_466 - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - numFiles 4 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 6312 - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 7094 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - name: default.list_bucketing_mul_col - Truncated Path -> Alias: - /list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=466/col4=val_466 [list_bucketing_mul_col] - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_mul_col -PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -1 466 1 val_466 1 2008-04-08 11 -1 466 1 val_466 1 2008-04-08 11 -1 466 1 val_466 1 2008-04-08 11 -PREHOOK: query: explain extended -select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - list_bucketing_mul_col - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - and - and - and - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - = - TOK_TABLE_OR_COL - hr - '11' - = - TOK_TABLE_OR_COL - col2 - "382" - = - TOK_TABLE_OR_COL - col4 - "val_382" - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: list_bucketing_mul_col - Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((col2 = '382') and (col4 = 'val_382')) (type: boolean) - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col1 (type: string), '382' (type: string), col3 (type: string), 'val_382' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:string:string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - numFiles 4 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 6312 - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 7094 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - name: default.list_bucketing_mul_col - Truncated Path -> Alias: - /list_bucketing_mul_col/ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [list_bucketing_mul_col] - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_mul_col -PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -1 382 1 val_382 1 2008-04-08 11 -1 382 1 val_382 1 2008-04-08 11 -PREHOOK: query: drop table list_bucketing_mul_col -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_mul_col -PREHOOK: Output: default@list_bucketing_mul_col -POSTHOOK: query: drop table list_bucketing_mul_col -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: Output: default@list_bucketing_mul_col diff --git ql/src/test/results/clientpositive/list_bucket_dml_12.q.out ql/src/test/results/clientpositive/list_bucket_dml_12.q.out new file mode 100644 index 0000000000000000000000000000000000000000..0e11f3f3d76e30da937cd4415bd2adcabcf5b215 --- /dev/null +++ ql/src/test/results/clientpositive/list_bucket_dml_12.q.out @@ -0,0 +1,424 @@ +PREHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS + +-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns +create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) + partitioned by (ds String, hr String) + skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_mul_col +POSTHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS + +-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns +create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) + partitioned by (ds String, hr String) + skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_mul_col +PREHOOK: query: -- list bucketing DML +explain extended +insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') +select 1, key, 1, value, 1 from src +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML +explain extended +insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') +select 1, key, 1, value, 1 from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: '1' (type: string), key (type: string), '1' (type: string), value (type: string), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 11 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') +select 1, key, 1, value, 1 from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 +POSTHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') +select 1, key, 1, value, 1 from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION [] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION [] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION [] +PREHOOK: query: -- check DML result +show partitions list_bucketing_mul_col +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: query: -- check DML result +show partitions list_bucketing_mul_col +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_mul_col +ds=2008-04-08/hr=11 +PREHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_mul_col +# col_name data_type comment + +col1 string +col2 string +col3 string +col4 string +col5 string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: list_bucketing_mul_col +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 4 + numRows 500 + rawDataSize 6312 + totalSize 7094 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [col2, col4] +Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[466, val_466]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=466/col4=val_466, [287, val_287]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=287/col4=val_287, [82, val_82]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=82/col4=val_82} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + numFiles 4 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 6312 + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 7094 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + name: default.list_bucketing_mul_col + Processor Tree: + TableScan + alias: list_bucketing_mul_col + Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_mul_col +PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +1 466 1 val_466 1 2008-04-08 11 +1 466 1 val_466 1 2008-04-08 11 +1 466 1 val_466 1 2008-04-08 11 +PREHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + numFiles 4 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 6312 + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 7094 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + name: default.list_bucketing_mul_col + Processor Tree: + TableScan + alias: list_bucketing_mul_col + Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((col2 = '382') and (col4 = 'val_382')) (type: boolean) + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: string), '382' (type: string), col3 (type: string), 'val_382' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_mul_col +PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +1 382 1 val_382 1 2008-04-08 11 +1 382 1 val_382 1 2008-04-08 11 +PREHOOK: query: drop table list_bucketing_mul_col +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@list_bucketing_mul_col +PREHOOK: Output: default@list_bucketing_mul_col +POSTHOOK: query: drop table list_bucketing_mul_col +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: Output: default@list_bucketing_mul_col diff --git ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.7.out ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.7.out deleted file mode 100644 index bfce33500eb4c1d471b0a5e15791ab6be8edd682..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.7.out +++ /dev/null @@ -1,337 +0,0 @@ -PREHOOK: query: -- Ensure skewed value map has escaped directory name - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns -create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) - partitioned by (ds String, hr String) - skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_mul_col -POSTHOOK: query: -- Ensure skewed value map has escaped directory name - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns -create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) - partitioned by (ds String, hr String) - skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_mul_col -PREHOOK: query: -- list bucketing DML -explain extended -insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') -select 1, key, 1, value, 1 from src -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML -explain extended -insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') -select 1, key, 1, value, 1 from src -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: '1' (type: string), key (type: string), '1' (type: string), value (type: string), '1' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 2013-01-23+18:00:99 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') -select 1, key, 1, value, 1 from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -POSTHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') -select 1, key, 1, value, 1 from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col1 EXPRESSION [] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col3 EXPRESSION [] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col5 EXPRESSION [] -PREHOOK: query: -- check DML result -show partitions list_bucketing_mul_col -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: query: -- check DML result -show partitions list_bucketing_mul_col -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_mul_col -ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -PREHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='2013-01-23+18:00:99') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='2013-01-23+18:00:99') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_mul_col -# col_name data_type comment - -col1 string -col2 string -col3 string -col4 string -col5 string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 2013-01-23+18:00:99] -Database: default -Table: list_bucketing_mul_col -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 - numRows 500 - rawDataSize 6312 - totalSize 7094 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [col2, col4] -Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[82, val_82]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=82/col4=val_82, [466, val_466]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=466/col4=val_466, [287, val_287]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=287/col4=val_287} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: explain extended -select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr 2013-01-23+18:00:99 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - numFiles 4 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 6312 - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 7094 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - name: default.list_bucketing_mul_col - Processor Tree: - TableScan - alias: list_bucketing_mul_col - Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '2013-01-23+18:00:99' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - ListSink - -PREHOOK: query: select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_mul_col -PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -#### A masked pattern was here #### -1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 -1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 -1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 -PREHOOK: query: drop table list_bucketing_mul_col -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_mul_col -PREHOOK: Output: default@list_bucketing_mul_col -POSTHOOK: query: drop table list_bucketing_mul_col -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: Output: default@list_bucketing_mul_col diff --git ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.8.out ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.8.out deleted file mode 100644 index f7a103995ca037fc877853ef00e37e28cfe5f027..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_13.q.java1.8.out +++ /dev/null @@ -1,439 +0,0 @@ -PREHOOK: query: -- Ensure skewed value map has escaped directory name - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns -create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) - partitioned by (ds String, hr String) - skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_mul_col -POSTHOOK: query: -- Ensure skewed value map has escaped directory name - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns -create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) - partitioned by (ds String, hr String) - skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_mul_col -PREHOOK: query: -- list bucketing DML -explain extended -insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') -select 1, key, 1, value, 1 from src -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML -explain extended -insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') -select 1, key, 1, value, 1 from src -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - src - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - list_bucketing_mul_col - TOK_PARTSPEC - TOK_PARTVAL - ds - '2008-04-08' - TOK_PARTVAL - hr - '2013-01-23+18:00:99' - TOK_SELECT - TOK_SELEXPR - 1 - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - 1 - TOK_SELEXPR - TOK_TABLE_OR_COL - value - TOK_SELEXPR - 1 - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: UDFToString(1) (type: string), key (type: string), UDFToString(1) (type: string), value (type: string), UDFToString(1) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [$hdt$_0:src] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 2013-01-23+18:00:99 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') -select 1, key, 1, value, 1 from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -POSTHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') -select 1, key, 1, value, 1 from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col1 EXPRESSION [] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col3 EXPRESSION [] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col5 EXPRESSION [] -PREHOOK: query: -- check DML result -show partitions list_bucketing_mul_col -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: query: -- check DML result -show partitions list_bucketing_mul_col -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_mul_col -ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -PREHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='2013-01-23+18:00:99') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='2013-01-23+18:00:99') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_mul_col -# col_name data_type comment - -col1 string -col2 string -col3 string -col4 string -col5 string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 2013-01-23+18:00:99] -Database: default -Table: list_bucketing_mul_col -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 4 - numRows 500 - rawDataSize 6312 - totalSize 7094 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [col2, col4] -Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[466, val_466]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=466/col4=val_466, [287, val_287]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=287/col4=val_287, [82, val_82]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=82/col4=val_82} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: explain extended -select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - list_bucketing_mul_col - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - and - and - and - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - = - TOK_TABLE_OR_COL - hr - '2013-01-23+18:00:99' - = - TOK_TABLE_OR_COL - col2 - "466" - = - TOK_TABLE_OR_COL - col4 - "val_466" - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: list_bucketing_mul_col - Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '2013-01-23+18:00:99' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:string:string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: col4=val_466 - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr 2013-01-23+18:00:99 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - numFiles 4 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 6312 - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 7094 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns col1,col2,col3,col4,col5 - columns.comments - columns.types string:string:string:string:string -#### A masked pattern was here #### - name default.list_bucketing_mul_col - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_mul_col - name: default.list_bucketing_mul_col - Truncated Path -> Alias: - /list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=466/col4=val_466 [list_bucketing_mul_col] - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_mul_col -PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_mul_col -where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 -#### A masked pattern was here #### -1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 -1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 -1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 -PREHOOK: query: drop table list_bucketing_mul_col -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_mul_col -PREHOOK: Output: default@list_bucketing_mul_col -POSTHOOK: query: drop table list_bucketing_mul_col -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_mul_col -POSTHOOK: Output: default@list_bucketing_mul_col diff --git ql/src/test/results/clientpositive/list_bucket_dml_13.q.out ql/src/test/results/clientpositive/list_bucket_dml_13.q.out new file mode 100644 index 0000000000000000000000000000000000000000..93ebef0967410452d485f6b9e291d04ffa17de79 --- /dev/null +++ ql/src/test/results/clientpositive/list_bucket_dml_13.q.out @@ -0,0 +1,335 @@ +PREHOOK: query: -- Ensure skewed value map has escaped directory name + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS + +-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns +create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) + partitioned by (ds String, hr String) + skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_mul_col +POSTHOOK: query: -- Ensure skewed value map has escaped directory name + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS + +-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns +create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string) + partitioned by (ds String, hr String) + skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82')) + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_mul_col +PREHOOK: query: -- list bucketing DML +explain extended +insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') +select 1, key, 1, value, 1 from src +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML +explain extended +insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') +select 1, key, 1, value, 1 from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: '1' (type: string), key (type: string), '1' (type: string), value (type: string), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 2013-01-23+18:00:99 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') +select 1, key, 1, value, 1 from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +POSTHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') +select 1, key, 1, value, 1 from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col1 EXPRESSION [] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col3 EXPRESSION [] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=2013-01-23+18:00:99).col5 EXPRESSION [] +PREHOOK: query: -- check DML result +show partitions list_bucketing_mul_col +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: query: -- check DML result +show partitions list_bucketing_mul_col +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_mul_col +ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +PREHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='2013-01-23+18:00:99') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='2013-01-23+18:00:99') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_mul_col +# col_name data_type comment + +col1 string +col2 string +col3 string +col4 string +col5 string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 2013-01-23+18:00:99] +Database: default +Table: list_bucketing_mul_col +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 4 + numRows 500 + rawDataSize 6312 + totalSize 7094 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [col2, col4] +Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[466, val_466]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=466/col4=val_466, [287, val_287]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=287/col4=val_287, [82, val_82]=/list_bucketing_mul_col/ds=2008-04-08/hr=2013-01-23+18%3A00%3A99/col2=82/col4=val_82} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr 2013-01-23+18:00:99 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + numFiles 4 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 6312 + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 7094 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns col1,col2,col3,col4,col5 + columns.comments + columns.types string:string:string:string:string +#### A masked pattern was here #### + name default.list_bucketing_mul_col + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_mul_col + name: default.list_bucketing_mul_col + Processor Tree: + TableScan + alias: list_bucketing_mul_col + Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean) + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '2013-01-23+18:00:99' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_mul_col +PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +#### A masked pattern was here #### +POSTHOOK: query: select * from list_bucketing_mul_col +where ds='2008-04-08' and hr='2013-01-23+18:00:99' and col2 = "466" and col4 = "val_466" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=2013-01-23+18%3A00%3A99 +#### A masked pattern was here #### +1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 +1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 +1 466 1 val_466 1 2008-04-08 2013-01-23+18:00:99 +PREHOOK: query: drop table list_bucketing_mul_col +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@list_bucketing_mul_col +PREHOOK: Output: default@list_bucketing_mul_col +POSTHOOK: query: drop table list_bucketing_mul_col +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@list_bucketing_mul_col +POSTHOOK: Output: default@list_bucketing_mul_col diff --git ql/src/test/results/clientpositive/list_bucket_dml_2.q.java1.7.out ql/src/test/results/clientpositive/list_bucket_dml_2.q.java1.7.out deleted file mode 100644 index dcfbec0423737c2fe2f326ce14a936807c416cab..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_2.q.java1.7.out +++ /dev/null @@ -1,591 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_static_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [srcpart] - /srcpart/ds=2008-04-08/hr=12 [srcpart] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 6 - numRows 1000 - rawDataSize 9624 - totalSize 10898 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[103, val_103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103/value=val_103, [484, val_484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484/value=val_484} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -1000 -PREHOOK: query: select count(*) from list_bucketing_static_part -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from list_bucketing_static_part -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -1000 -PREHOOK: query: explain extended -select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - numFiles 6 - numRows 1000 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 9624 - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10898 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - Processor Tree: - TableScan - alias: list_bucketing_static_part - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - ListSink - -PREHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 11 -PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 12 -PREHOOK: query: -- 51 and val_51 in the table so skewed data for 51 and val_14 should be none --- but query should succeed for 51 or 51 and val_14 -select * from srcpart where ds = '2008-04-08' and key = '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: -- 51 and val_51 in the table so skewed data for 51 and val_14 should be none --- but query should succeed for 51 or 51 and val_14 -select * from srcpart where ds = '2008-04-08' and key = '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 12 -51 val_51 2008-04-08 12 -PREHOOK: query: select * from list_bucketing_static_part where key = '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_static_part where key = '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 11 -PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '51' and value = 'val_14' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '51' and value = 'val_14' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -PREHOOK: query: select * from list_bucketing_static_part where key = '51' and value = 'val_14' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_static_part where key = '51' and value = 'val_14' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -PREHOOK: query: -- queries with < <= > >= should work for skewed test although we don't benefit from pruning -select count(1) from srcpart where ds = '2008-04-08' and key < '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: -- queries with < <= > >= should work for skewed test although we don't benefit from pruning -select count(1) from srcpart where ds = '2008-04-08' and key < '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -910 -PREHOOK: query: select count(1) from list_bucketing_static_part where key < '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from list_bucketing_static_part where key < '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -910 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key <= '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key <= '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -914 -PREHOOK: query: select count(1) from list_bucketing_static_part where key <= '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from list_bucketing_static_part where key <= '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -914 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key > '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key > '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -86 -PREHOOK: query: select count(1) from list_bucketing_static_part where key > '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from list_bucketing_static_part where key > '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -86 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key >= '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key >= '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -90 -PREHOOK: query: select count(1) from list_bucketing_static_part where key >= '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from list_bucketing_static_part where key >= '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -90 -PREHOOK: query: -- clean up -drop table list_bucketing_static_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- clean up -drop table list_bucketing_static_part -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Output: default@list_bucketing_static_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_2.q.java1.8.out ql/src/test/results/clientpositive/list_bucket_dml_2.q.java1.8.out deleted file mode 100644 index aeeba03f5b3bec34442ad148e2683c6e1822b58a..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_2.q.java1.8.out +++ /dev/null @@ -1,692 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_static_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - srcpart - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - list_bucketing_static_part - TOK_PARTSPEC - TOK_PARTVAL - ds - '2008-04-08' - TOK_PARTVAL - hr - '11' - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - value - TOK_WHERE - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 6 - numRows 1000 - rawDataSize 9624 - totalSize 10898 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484/value=val_484, [103, val_103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103/value=val_103} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -1000 -PREHOOK: query: select count(*) from list_bucketing_static_part -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from list_bucketing_static_part -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -1000 -PREHOOK: query: explain extended -select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - list_bucketing_static_part - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - and - and - and - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - = - TOK_TABLE_OR_COL - hr - '11' - = - TOK_TABLE_OR_COL - key - '484' - = - TOK_TABLE_OR_COL - value - 'val_484' - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: list_bucketing_static_part - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: value=val_484 - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - numFiles 6 - numRows 1000 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 9624 - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10898 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - Truncated Path -> Alias: - /list_bucketing_static_part/ds=2008-04-08/hr=11/key=484/value=val_484 [list_bucketing_static_part] - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 11 -PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 12 -PREHOOK: query: -- 51 and val_51 in the table so skewed data for 51 and val_14 should be none --- but query should succeed for 51 or 51 and val_14 -select * from srcpart where ds = '2008-04-08' and key = '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: -- 51 and val_51 in the table so skewed data for 51 and val_14 should be none --- but query should succeed for 51 or 51 and val_14 -select * from srcpart where ds = '2008-04-08' and key = '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 12 -51 val_51 2008-04-08 12 -PREHOOK: query: select * from list_bucketing_static_part where key = '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_static_part where key = '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 11 -PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '51' and value = 'val_14' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '51' and value = 'val_14' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -PREHOOK: query: select * from list_bucketing_static_part where key = '51' and value = 'val_14' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_static_part where key = '51' and value = 'val_14' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -PREHOOK: query: -- queries with < <= > >= should work for skewed test although we don't benefit from pruning -select count(1) from srcpart where ds = '2008-04-08' and key < '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: -- queries with < <= > >= should work for skewed test although we don't benefit from pruning -select count(1) from srcpart where ds = '2008-04-08' and key < '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -910 -PREHOOK: query: select count(1) from list_bucketing_static_part where key < '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from list_bucketing_static_part where key < '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -910 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key <= '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key <= '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -914 -PREHOOK: query: select count(1) from list_bucketing_static_part where key <= '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from list_bucketing_static_part where key <= '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -914 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key > '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key > '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -86 -PREHOOK: query: select count(1) from list_bucketing_static_part where key > '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from list_bucketing_static_part where key > '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -86 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key >= '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key >= '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -90 -PREHOOK: query: select count(1) from list_bucketing_static_part where key >= '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from list_bucketing_static_part where key >= '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -90 -PREHOOK: query: -- clean up -drop table list_bucketing_static_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- clean up -drop table list_bucketing_static_part -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Output: default@list_bucketing_static_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_2.q.out ql/src/test/results/clientpositive/list_bucket_dml_2.q.out new file mode 100644 index 0000000000000000000000000000000000000000..a29c224047d09fc15053a0ef57b9559b1da01a75 --- /dev/null +++ ql/src/test/results/clientpositive/list_bucket_dml_2.q.out @@ -0,0 +1,589 @@ +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS + +-- list bucketing DML: static partition. multiple skewed columns. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_static_part +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS + +-- list bucketing DML: static partition. multiple skewed columns. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_static_part +PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 11 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- check DML result +show partitions list_bucketing_static_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: -- check DML result +show partitions list_bucketing_static_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_static_part +ds=2008-04-08/hr=11 +PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_static_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: list_bucketing_static_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 6 + numRows 1000 + rawDataSize 9624 + totalSize 10898 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key, value] +Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484/value=val_484, [103, val_103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103/value=val_103} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +1000 +PREHOOK: query: select count(*) from list_bucketing_static_part +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_static_part +PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from list_bucketing_static_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_static_part +POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +1000 +PREHOOK: query: explain extended +select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + numFiles 6 + numRows 1000 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 9624 + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 10898 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + Processor Tree: + TableScan + alias: list_bucketing_static_part + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_static_part +PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_static_part +POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +484 val_484 2008-04-08 11 +484 val_484 2008-04-08 11 +PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +484 val_484 2008-04-08 11 +484 val_484 2008-04-08 12 +PREHOOK: query: -- 51 and val_51 in the table so skewed data for 51 and val_14 should be none +-- but query should succeed for 51 or 51 and val_14 +select * from srcpart where ds = '2008-04-08' and key = '51' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: -- 51 and val_51 in the table so skewed data for 51 and val_14 should be none +-- but query should succeed for 51 or 51 and val_14 +select * from srcpart where ds = '2008-04-08' and key = '51' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +51 val_51 2008-04-08 11 +51 val_51 2008-04-08 11 +51 val_51 2008-04-08 12 +51 val_51 2008-04-08 12 +PREHOOK: query: select * from list_bucketing_static_part where key = '51' +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_static_part +PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from list_bucketing_static_part where key = '51' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_static_part +POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +51 val_51 2008-04-08 11 +51 val_51 2008-04-08 11 +51 val_51 2008-04-08 11 +51 val_51 2008-04-08 11 +PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '51' and value = 'val_14' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '51' and value = 'val_14' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +PREHOOK: query: select * from list_bucketing_static_part where key = '51' and value = 'val_14' +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_static_part +PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from list_bucketing_static_part where key = '51' and value = 'val_14' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_static_part +POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +PREHOOK: query: -- queries with < <= > >= should work for skewed test although we don't benefit from pruning +select count(1) from srcpart where ds = '2008-04-08' and key < '51' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: -- queries with < <= > >= should work for skewed test although we don't benefit from pruning +select count(1) from srcpart where ds = '2008-04-08' and key < '51' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +910 +PREHOOK: query: select count(1) from list_bucketing_static_part where key < '51' +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_static_part +PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from list_bucketing_static_part where key < '51' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_static_part +POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +910 +PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key <= '51' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key <= '51' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +914 +PREHOOK: query: select count(1) from list_bucketing_static_part where key <= '51' +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_static_part +PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from list_bucketing_static_part where key <= '51' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_static_part +POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +914 +PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key > '51' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key > '51' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +86 +PREHOOK: query: select count(1) from list_bucketing_static_part where key > '51' +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_static_part +PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from list_bucketing_static_part where key > '51' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_static_part +POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +86 +PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key >= '51' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key >= '51' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +90 +PREHOOK: query: select count(1) from list_bucketing_static_part where key >= '51' +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_static_part +PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from list_bucketing_static_part where key >= '51' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_static_part +POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +90 +PREHOOK: query: -- clean up +drop table list_bucketing_static_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@list_bucketing_static_part +PREHOOK: Output: default@list_bucketing_static_part +POSTHOOK: query: -- clean up +drop table list_bucketing_static_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@list_bucketing_static_part +POSTHOOK: Output: default@list_bucketing_static_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_4.q.java1.7.out ql/src/test/results/clientpositive/list_bucket_dml_4.q.java1.7.out deleted file mode 100644 index c15c6a29273ecc60947a025bc96069d2f6b08d35..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_4.q.java1.7.out +++ /dev/null @@ -1,813 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. merge. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- after merge --- 142 000000_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 --- after merge --- 118 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. merge. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- after merge --- 142 000000_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 --- after merge --- 118 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_static_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [srcpart] - /srcpart/ds=2008-04-08/hr=12 [srcpart] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 6 - numRows 1000 - rawDataSize 9624 - totalSize 10898 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[103, val_103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103/value=val_103, [484, val_484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484/value=val_484} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: -- list bucketing DML with merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML with merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [srcpart] - /srcpart/ds=2008-04-08/hr=12 [srcpart] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 - numRows 1000 - rawDataSize 9624 - totalSize 10786 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[103, val_103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103/value=val_103, [484, val_484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484/value=val_484} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -1000 -PREHOOK: query: select count(*) from list_bucketing_static_part -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from list_bucketing_static_part -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -1000 -PREHOOK: query: explain extended -select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - numFiles 4 - numRows 1000 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 9624 - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10786 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - Processor Tree: - TableScan - alias: list_bucketing_static_part - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - ListSink - -PREHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 11 -PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 12 -PREHOOK: query: -- clean up -drop table list_bucketing_static_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- clean up -drop table list_bucketing_static_part -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Output: default@list_bucketing_static_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_4.q.java1.8.out ql/src/test/results/clientpositive/list_bucket_dml_4.q.java1.8.out deleted file mode 100644 index d484626cbe454e343888082be44cddaa14fd6511..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_4.q.java1.8.out +++ /dev/null @@ -1,915 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. merge. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- after merge --- 142 000000_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 --- after merge --- 118 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. merge. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- after merge --- 142 000000_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 --- after merge --- 118 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_static_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - srcpart - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - list_bucketing_static_part - TOK_PARTSPEC - TOK_PARTVAL - ds - '2008-04-08' - TOK_PARTVAL - hr - '11' - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - value - TOK_WHERE - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [srcpart] - /srcpart/ds=2008-04-08/hr=12 [srcpart] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 6 - numRows 1000 - rawDataSize 9624 - totalSize 10898 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484/value=val_484, [103, val_103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103/value=val_103} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: -- list bucketing DML with merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML with merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - srcpart - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - list_bucketing_static_part - TOK_PARTSPEC - TOK_PARTVAL - ds - '2008-04-08' - TOK_PARTVAL - hr - '11' - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - value - TOK_WHERE - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [srcpart] - /srcpart/ds=2008-04-08/hr=12 [srcpart] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 4 - numRows 1000 - rawDataSize 9624 - totalSize 10786 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484/value=val_484, [103, val_103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103/value=val_103} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -1000 -PREHOOK: query: select count(*) from list_bucketing_static_part -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from list_bucketing_static_part -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -1000 -PREHOOK: query: explain extended -select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - list_bucketing_static_part - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - and - and - and - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - = - TOK_TABLE_OR_COL - hr - '11' - = - TOK_TABLE_OR_COL - key - '484' - = - TOK_TABLE_OR_COL - value - 'val_484' - - -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - numFiles 4 - numRows 1000 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 9624 - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10786 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - Processor Tree: - TableScan - alias: list_bucketing_static_part - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - ListSink - -PREHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 11 -PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 12 -PREHOOK: query: -- clean up -drop table list_bucketing_static_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- clean up -drop table list_bucketing_static_part -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Output: default@list_bucketing_static_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_4.q.out ql/src/test/results/clientpositive/list_bucket_dml_4.q.out new file mode 100644 index 0000000000000000000000000000000000000000..5f0406a9eda453cad20c8b3f3b0a6ae47ba07376 --- /dev/null +++ ql/src/test/results/clientpositive/list_bucket_dml_4.q.out @@ -0,0 +1,811 @@ +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS + +-- list bucketing DML: static partition. multiple skewed columns. merge. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- after merge +-- 142 000000_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 +-- after merge +-- 118 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_static_part +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS + +-- list bucketing DML: static partition. multiple skewed columns. merge. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- after merge +-- 142 000000_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 +-- after merge +-- 118 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_static_part +PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 11 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- check DML result +show partitions list_bucketing_static_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: -- check DML result +show partitions list_bucketing_static_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_static_part +ds=2008-04-08/hr=11 +PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_static_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: list_bucketing_static_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 6 + numRows 1000 + rawDataSize 9624 + totalSize 10898 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key, value] +Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484/value=val_484, [103, val_103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103/value=val_103} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- list bucketing DML with merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML with merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 11 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- check DML result +show partitions list_bucketing_static_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: -- check DML result +show partitions list_bucketing_static_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_static_part +ds=2008-04-08/hr=11 +PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_static_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: list_bucketing_static_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 4 + numRows 1000 + rawDataSize 9624 + totalSize 10786 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key, value] +Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484/value=val_484, [103, val_103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103/value=val_103} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +1000 +PREHOOK: query: select count(*) from list_bucketing_static_part +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_static_part +PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from list_bucketing_static_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_static_part +POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +1000 +PREHOOK: query: explain extended +select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + numFiles 4 + numRows 1000 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 9624 + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 10786 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + Processor Tree: + TableScan + alias: list_bucketing_static_part + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_static_part +PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_static_part +POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +484 val_484 2008-04-08 11 +484 val_484 2008-04-08 11 +PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +484 val_484 2008-04-08 11 +484 val_484 2008-04-08 12 +PREHOOK: query: -- clean up +drop table list_bucketing_static_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@list_bucketing_static_part +PREHOOK: Output: default@list_bucketing_static_part +POSTHOOK: query: -- clean up +drop table list_bucketing_static_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@list_bucketing_static_part +POSTHOOK: Output: default@list_bucketing_static_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_5.q.java1.7.out ql/src/test/results/clientpositive/list_bucket_dml_5.q.java1.7.out deleted file mode 100644 index a0947b21d03e20cc22d712d08d10d12b4f237815..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_5.q.java1.7.out +++ /dev/null @@ -1,506 +0,0 @@ -PREHOOK: query: -- list bucketing DML: multiple skewed columns. 2 stages - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- create a skewed table -create table list_bucketing_dynamic_part (key String, value String) -partitioned by (ds String, hr String) -skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) -stored as DIRECTORIES -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_dynamic_part -POSTHOOK: query: -- list bucketing DML: multiple skewed columns. 2 stages - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- create a skewed table -create table list_bucketing_dynamic_part (key String, value String) -partitioned by (ds String, hr String) -skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) -stored as DIRECTORIES -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_dynamic_part -PREHOOK: query: -- list bucketing DML -explain extended -insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML -explain extended -insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.list_bucketing_dynamic_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [srcpart] - /srcpart/ds=2008-04-08/hr=12 [srcpart] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.list_bucketing_dynamic_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08 -POSTHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: -- check DML result -desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_dynamic_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 3 - numRows 500 - rawDataSize 5312 - totalSize 5812 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[103, val_103]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=11/key=103/value=val_103, [484, val_484]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=11/key=484/value=val_484} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='12') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='12') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 12] -Database: default -Table: list_bucketing_dynamic_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 3 - numRows 500 - rawDataSize 5312 - totalSize 5812 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[103, val_103]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=12/key=103/value=val_103, [484, val_484]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=12/key=484/value=val_484} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: select count(1) from srcpart where ds='2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds='2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -1000 -PREHOOK: query: select count(1) from list_bucketing_dynamic_part where ds='2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=11 -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from list_bucketing_dynamic_part where ds='2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -1000 -PREHOOK: query: select key, value from srcpart where ds='2008-04-08' and key = "103" and value ="val_103" -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select key, value from srcpart where ds='2008-04-08' and key = "103" and value ="val_103" -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -103 val_103 -103 val_103 -103 val_103 -103 val_103 -PREHOOK: query: explain extended -select key, value, ds, hr from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103" -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select key, value, ds, hr from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103" -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - numFiles 3 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.list_bucketing_dynamic_part - name: default.list_bucketing_dynamic_part - Partition - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - numFiles 3 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.list_bucketing_dynamic_part - name: default.list_bucketing_dynamic_part - Processor Tree: - TableScan - alias: list_bucketing_dynamic_part - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '103') and (value = 'val_103')) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '103' (type: string), 'val_103' (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - ListSink - -PREHOOK: query: select key, value, ds, hr from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103" -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=11 -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select key, value, ds, hr from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103" -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -103 val_103 2008-04-08 11 -103 val_103 2008-04-08 11 -103 val_103 2008-04-08 12 -103 val_103 2008-04-08 12 -PREHOOK: query: -- clean up resources -drop table list_bucketing_dynamic_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Output: default@list_bucketing_dynamic_part -POSTHOOK: query: -- clean up resources -drop table list_bucketing_dynamic_part -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Output: default@list_bucketing_dynamic_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_5.q.java1.8.out ql/src/test/results/clientpositive/list_bucket_dml_5.q.java1.8.out deleted file mode 100644 index 1c33382b8d2bcfb5f2eb8daec4253b252fbb5a5b..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_5.q.java1.8.out +++ /dev/null @@ -1,617 +0,0 @@ -PREHOOK: query: -- list bucketing DML: multiple skewed columns. 2 stages - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- create a skewed table -create table list_bucketing_dynamic_part (key String, value String) -partitioned by (ds String, hr String) -skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) -stored as DIRECTORIES -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_dynamic_part -POSTHOOK: query: -- list bucketing DML: multiple skewed columns. 2 stages - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- create a skewed table -create table list_bucketing_dynamic_part (key String, value String) -partitioned by (ds String, hr String) -skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) -stored as DIRECTORIES -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_dynamic_part -PREHOOK: query: -- list bucketing DML -explain extended -insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML -explain extended -insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - srcpart - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - list_bucketing_dynamic_part - TOK_PARTSPEC - TOK_PARTVAL - ds - '2008-04-08' - TOK_PARTVAL - hr - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - value - TOK_SELEXPR - TOK_TABLE_OR_COL - hr - TOK_WHERE - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.list_bucketing_dynamic_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.list_bucketing_dynamic_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08 -POSTHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: -- check DML result -desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_dynamic_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 3 - numRows 500 - rawDataSize 5312 - totalSize 5812 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=11/key=484/value=val_484, [103, val_103]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=11/key=103/value=val_103} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='12') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='12') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 12] -Database: default -Table: list_bucketing_dynamic_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 3 - numRows 500 - rawDataSize 5312 - totalSize 5812 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=12/key=484/value=val_484, [103, val_103]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=12/key=103/value=val_103} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: select count(1) from srcpart where ds='2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds='2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -1000 -PREHOOK: query: select count(1) from list_bucketing_dynamic_part where ds='2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=11 -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from list_bucketing_dynamic_part where ds='2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -1000 -PREHOOK: query: select key, value from srcpart where ds='2008-04-08' and key = "103" and value ="val_103" -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select key, value from srcpart where ds='2008-04-08' and key = "103" and value ="val_103" -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -103 val_103 -103 val_103 -103 val_103 -103 val_103 -PREHOOK: query: explain extended -select key, value, ds, hr from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103" -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select key, value, ds, hr from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103" -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - list_bucketing_dynamic_part - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - value - TOK_SELEXPR - TOK_TABLE_OR_COL - ds - TOK_SELEXPR - TOK_TABLE_OR_COL - hr - TOK_WHERE - and - and - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - = - TOK_TABLE_OR_COL - key - "103" - = - TOK_TABLE_OR_COL - value - "val_103" - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: list_bucketing_dynamic_part - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '103') and (value = 'val_103')) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '103' (type: string), 'val_103' (type: string), '2008-04-08' (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: value=val_103 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - numFiles 3 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.list_bucketing_dynamic_part - name: default.list_bucketing_dynamic_part -#### A masked pattern was here #### - Partition - base file name: value=val_103 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - numFiles 3 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.list_bucketing_dynamic_part - name: default.list_bucketing_dynamic_part - Truncated Path -> Alias: - /list_bucketing_dynamic_part/ds=2008-04-08/hr=11/key=103/value=val_103 [list_bucketing_dynamic_part] - /list_bucketing_dynamic_part/ds=2008-04-08/hr=12/key=103/value=val_103 [list_bucketing_dynamic_part] - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select key, value, ds, hr from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103" -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=11 -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select key, value, ds, hr from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103" -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -103 val_103 2008-04-08 11 -103 val_103 2008-04-08 11 -103 val_103 2008-04-08 12 -103 val_103 2008-04-08 12 -PREHOOK: query: -- clean up resources -drop table list_bucketing_dynamic_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Output: default@list_bucketing_dynamic_part -POSTHOOK: query: -- clean up resources -drop table list_bucketing_dynamic_part -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Output: default@list_bucketing_dynamic_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_5.q.out ql/src/test/results/clientpositive/list_bucket_dml_5.q.out new file mode 100644 index 0000000000000000000000000000000000000000..09cb847c21dce919027fa87b9c4cee5b1421d98a --- /dev/null +++ ql/src/test/results/clientpositive/list_bucket_dml_5.q.out @@ -0,0 +1,504 @@ +PREHOOK: query: -- list bucketing DML: multiple skewed columns. 2 stages + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS + +-- create a skewed table +create table list_bucketing_dynamic_part (key String, value String) +partitioned by (ds String, hr String) +skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) +stored as DIRECTORIES +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_dynamic_part +POSTHOOK: query: -- list bucketing DML: multiple skewed columns. 2 stages + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS + +-- create a skewed table +create table list_bucketing_dynamic_part (key String, value String) +partitioned by (ds String, hr String) +skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) +stored as DIRECTORIES +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_dynamic_part +PREHOOK: query: -- list bucketing DML +explain extended +insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML +explain extended +insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/ + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.list_bucketing_dynamic_part + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.list_bucketing_dynamic_part + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08 +POSTHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- check DML result +desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: query: -- check DML result +desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_dynamic_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: list_bucketing_dynamic_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 3 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key, value] +Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=11/key=484/value=val_484, [103, val_103]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=11/key=103/value=val_103} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='12') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='12') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_dynamic_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 12] +Database: default +Table: list_bucketing_dynamic_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 3 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key, value] +Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=12/key=484/value=val_484, [103, val_103]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=12/key=103/value=val_103} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select count(1) from srcpart where ds='2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from srcpart where ds='2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +1000 +PREHOOK: query: select count(1) from list_bucketing_dynamic_part where ds='2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_dynamic_part +PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=11 +PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from list_bucketing_dynamic_part where ds='2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +1000 +PREHOOK: query: select key, value from srcpart where ds='2008-04-08' and key = "103" and value ="val_103" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from srcpart where ds='2008-04-08' and key = "103" and value ="val_103" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +103 val_103 +103 val_103 +103 val_103 +103 val_103 +PREHOOK: query: explain extended +select key, value, ds, hr from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select key, value, ds, hr from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103" +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + numFiles 3 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.list_bucketing_dynamic_part + name: default.list_bucketing_dynamic_part + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + numFiles 3 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.list_bucketing_dynamic_part + name: default.list_bucketing_dynamic_part + Processor Tree: + TableScan + alias: list_bucketing_dynamic_part + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '103') and (value = 'val_103')) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '103' (type: string), 'val_103' (type: string), '2008-04-08' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select key, value, ds, hr from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103" +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_dynamic_part +PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=11 +PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, ds, hr from list_bucketing_dynamic_part where ds='2008-04-08' and key = "103" and value ="val_103" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +103 val_103 2008-04-08 11 +103 val_103 2008-04-08 11 +103 val_103 2008-04-08 12 +103 val_103 2008-04-08 12 +PREHOOK: query: -- clean up resources +drop table list_bucketing_dynamic_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@list_bucketing_dynamic_part +PREHOOK: Output: default@list_bucketing_dynamic_part +POSTHOOK: query: -- clean up resources +drop table list_bucketing_dynamic_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: Output: default@list_bucketing_dynamic_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_6.q.java1.7.out ql/src/test/results/clientpositive/list_bucket_dml_6.q.java1.7.out deleted file mode 100644 index c022618439ef7ef3b4c339339f227147357d615f..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_6.q.java1.7.out +++ /dev/null @@ -1,1007 +0,0 @@ -PREHOOK: query: -- list bucketing DML: dynamic partition. multiple skewed columns. merge. --- The following explains merge example used in this test case --- DML will generated 2 partitions --- ds=2008-04-08/hr=a1 --- ds=2008-04-08/hr=b1 --- without merge, each partition has more files --- ds=2008-04-08/hr=a1 has 2 files --- ds=2008-04-08/hr=b1 has 6 files --- with merge each partition has more files --- ds=2008-04-08/hr=a1 has 1 files --- ds=2008-04-08/hr=b1 has 4 files --- The following shows file size and name in each directory --- hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- without merge --- 155 000000_0 --- 155 000001_0 --- with merge --- 254 000000_0 --- hr=b1/key=103/value=val_103: --- without merge --- 99 000000_0 --- 99 000001_0 --- with merge --- 142 000001_0 --- hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- without merge --- 5181 000000_0 --- 5181 000001_0 --- with merge --- 5181 000000_0 --- 5181 000001_0 --- hr=b1/key=484/value=val_484 --- without merge --- 87 000000_0 --- 87 000001_0 --- with merge --- 118 000002_0 - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- create a skewed table -create table list_bucketing_dynamic_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_dynamic_part -POSTHOOK: query: -- list bucketing DML: dynamic partition. multiple skewed columns. merge. --- The following explains merge example used in this test case --- DML will generated 2 partitions --- ds=2008-04-08/hr=a1 --- ds=2008-04-08/hr=b1 --- without merge, each partition has more files --- ds=2008-04-08/hr=a1 has 2 files --- ds=2008-04-08/hr=b1 has 6 files --- with merge each partition has more files --- ds=2008-04-08/hr=a1 has 1 files --- ds=2008-04-08/hr=b1 has 4 files --- The following shows file size and name in each directory --- hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- without merge --- 155 000000_0 --- 155 000001_0 --- with merge --- 254 000000_0 --- hr=b1/key=103/value=val_103: --- without merge --- 99 000000_0 --- 99 000001_0 --- with merge --- 142 000001_0 --- hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- without merge --- 5181 000000_0 --- 5181 000001_0 --- with merge --- 5181 000000_0 --- 5181 000001_0 --- hr=b1/key=484/value=val_484 --- without merge --- 87 000000_0 --- 87 000001_0 --- with merge --- 118 000002_0 - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- create a skewed table -create table list_bucketing_dynamic_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_dynamic_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0) = 0.0), 'a1', 'b1') (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [srcpart] - /srcpart/ds=2008-04-08/hr=12 [srcpart] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08 -POSTHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=a1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=a1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_dynamic_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_dynamic_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_dynamic_part -ds=2008-04-08/hr=a1 -ds=2008-04-08/hr=b1 -PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, a1] -Database: default -Table: list_bucketing_dynamic_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 2 - numRows 16 - rawDataSize 136 - totalSize 310 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, b1] -Database: default -Table: list_bucketing_dynamic_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 6 - numRows 984 - rawDataSize 9488 - totalSize 10734 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[103, val_103]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=103/value=val_103, [484, val_484]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=484/value=val_484} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: -- list bucketing DML with merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML with merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0) = 0.0), 'a1', 'b1') (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns hr - partition_columns.types string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [srcpart] - /srcpart/ds=2008-04-08/hr=12 [srcpart] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns hr - partition_columns.types string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns hr - partition_columns.types string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns hr - partition_columns.types string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - name: default.list_bucketing_dynamic_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns hr - partition_columns.types string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns hr - partition_columns.types string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - name: default.list_bucketing_dynamic_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08 -POSTHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=a1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=a1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_dynamic_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_dynamic_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_dynamic_part -ds=2008-04-08/hr=a1 -ds=2008-04-08/hr=b1 -PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, a1] -Database: default -Table: list_bucketing_dynamic_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 1 - numRows 16 - rawDataSize 136 - totalSize 254 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, b1] -Database: default -Table: list_bucketing_dynamic_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 - numRows 984 - rawDataSize 9488 - totalSize 10622 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[103, val_103]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=103/value=val_103, [484, val_484]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=484/value=val_484} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -1000 -PREHOOK: query: select count(*) from list_bucketing_dynamic_part -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from list_bucketing_dynamic_part -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -#### A masked pattern was here #### -1000 -PREHOOK: query: explain extended -select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr a1 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - numFiles 1 - numRows 16 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 136 - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 254 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - name: default.list_bucketing_dynamic_part - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr b1 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - numFiles 4 - numRows 984 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 9488 - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10622 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - name: default.list_bucketing_dynamic_part - Processor Tree: - TableScan - alias: list_bucketing_dynamic_part - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), ds (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - ListSink - -PREHOOK: query: select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -#### A masked pattern was here #### -484 val_484 2008-04-08 b1 -484 val_484 2008-04-08 b1 -PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 12 -PREHOOK: query: -- clean up -drop table list_bucketing_dynamic_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Output: default@list_bucketing_dynamic_part -POSTHOOK: query: -- clean up -drop table list_bucketing_dynamic_part -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Output: default@list_bucketing_dynamic_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_6.q.java1.8.out ql/src/test/results/clientpositive/list_bucket_dml_6.q.java1.8.out deleted file mode 100644 index 1960d4155bcfe69ccd227348365041667b829080..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_6.q.java1.8.out +++ /dev/null @@ -1,1119 +0,0 @@ -PREHOOK: query: -- list bucketing DML: dynamic partition. multiple skewed columns. merge. --- The following explains merge example used in this test case --- DML will generated 2 partitions --- ds=2008-04-08/hr=a1 --- ds=2008-04-08/hr=b1 --- without merge, each partition has more files --- ds=2008-04-08/hr=a1 has 2 files --- ds=2008-04-08/hr=b1 has 6 files --- with merge each partition has more files --- ds=2008-04-08/hr=a1 has 1 files --- ds=2008-04-08/hr=b1 has 4 files --- The following shows file size and name in each directory --- hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- without merge --- 155 000000_0 --- 155 000001_0 --- with merge --- 254 000000_0 --- hr=b1/key=103/value=val_103: --- without merge --- 99 000000_0 --- 99 000001_0 --- with merge --- 142 000001_0 --- hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- without merge --- 5181 000000_0 --- 5181 000001_0 --- with merge --- 5181 000000_0 --- 5181 000001_0 --- hr=b1/key=484/value=val_484 --- without merge --- 87 000000_0 --- 87 000001_0 --- with merge --- 118 000002_0 - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- create a skewed table -create table list_bucketing_dynamic_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_dynamic_part -POSTHOOK: query: -- list bucketing DML: dynamic partition. multiple skewed columns. merge. --- The following explains merge example used in this test case --- DML will generated 2 partitions --- ds=2008-04-08/hr=a1 --- ds=2008-04-08/hr=b1 --- without merge, each partition has more files --- ds=2008-04-08/hr=a1 has 2 files --- ds=2008-04-08/hr=b1 has 6 files --- with merge each partition has more files --- ds=2008-04-08/hr=a1 has 1 files --- ds=2008-04-08/hr=b1 has 4 files --- The following shows file size and name in each directory --- hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- without merge --- 155 000000_0 --- 155 000001_0 --- with merge --- 254 000000_0 --- hr=b1/key=103/value=val_103: --- without merge --- 99 000000_0 --- 99 000001_0 --- with merge --- 142 000001_0 --- hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- without merge --- 5181 000000_0 --- 5181 000001_0 --- with merge --- 5181 000000_0 --- 5181 000001_0 --- hr=b1/key=484/value=val_484 --- without merge --- 87 000000_0 --- 87 000001_0 --- with merge --- 118 000002_0 - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- create a skewed table -create table list_bucketing_dynamic_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_dynamic_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - srcpart - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - list_bucketing_dynamic_part - TOK_PARTSPEC - TOK_PARTVAL - ds - '2008-04-08' - TOK_PARTVAL - hr - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - value - TOK_SELEXPR - TOK_FUNCTION - if - == - % - TOK_TABLE_OR_COL - key - 100 - 0 - 'a1' - 'b1' - TOK_WHERE - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0) = 0.0), 'a1', 'b1') (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [srcpart] - /srcpart/ds=2008-04-08/hr=12 [srcpart] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08 -POSTHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=a1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=a1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_dynamic_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_dynamic_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_dynamic_part -ds=2008-04-08/hr=a1 -ds=2008-04-08/hr=b1 -PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, a1] -Database: default -Table: list_bucketing_dynamic_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 2 - numRows 16 - rawDataSize 136 - totalSize 310 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, b1] -Database: default -Table: list_bucketing_dynamic_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 6 - numRows 984 - rawDataSize 9488 - totalSize 10734 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=484/value=val_484, [103, val_103]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=103/value=val_103} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: -- list bucketing DML with merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML with merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - srcpart - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - list_bucketing_dynamic_part - TOK_PARTSPEC - TOK_PARTVAL - ds - '2008-04-08' - TOK_PARTVAL - hr - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - value - TOK_SELEXPR - TOK_FUNCTION - if - == - % - TOK_TABLE_OR_COL - key - 100 - 0 - 'a1' - 'b1' - TOK_WHERE - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0) = 0.0), 'a1', 'b1') (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns hr - partition_columns.types string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [srcpart] - /srcpart/ds=2008-04-08/hr=12 [srcpart] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns hr - partition_columns.types string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns hr - partition_columns.types string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns hr - partition_columns.types string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - name: default.list_bucketing_dynamic_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns hr - partition_columns.types string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns hr - partition_columns.types string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - name: default.list_bucketing_dynamic_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08 -POSTHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=a1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=a1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_dynamic_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_dynamic_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_dynamic_part -ds=2008-04-08/hr=a1 -ds=2008-04-08/hr=b1 -PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, a1] -Database: default -Table: list_bucketing_dynamic_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 1 - numRows 16 - rawDataSize 136 - totalSize 254 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, b1] -Database: default -Table: list_bucketing_dynamic_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 4 - numRows 984 - rawDataSize 9488 - totalSize 10622 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=484/value=val_484, [103, val_103]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=103/value=val_103} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -1000 -PREHOOK: query: select count(*) from list_bucketing_dynamic_part -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from list_bucketing_dynamic_part -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -#### A masked pattern was here #### -1000 -PREHOOK: query: explain extended -select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - list_bucketing_dynamic_part - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - and - = - TOK_TABLE_OR_COL - key - '484' - = - TOK_TABLE_OR_COL - value - 'val_484' - - -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr a1 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - numFiles 1 - numRows 16 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 136 - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 254 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - name: default.list_bucketing_dynamic_part - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr b1 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - numFiles 4 - numRows 984 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 9488 - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10622 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - name: default.list_bucketing_dynamic_part - Processor Tree: - TableScan - alias: list_bucketing_dynamic_part - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), ds (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - ListSink - -PREHOOK: query: select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -#### A masked pattern was here #### -484 val_484 2008-04-08 b1 -484 val_484 2008-04-08 b1 -PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 12 -PREHOOK: query: -- clean up -drop table list_bucketing_dynamic_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Output: default@list_bucketing_dynamic_part -POSTHOOK: query: -- clean up -drop table list_bucketing_dynamic_part -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Output: default@list_bucketing_dynamic_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_6.q.out ql/src/test/results/clientpositive/list_bucket_dml_6.q.out new file mode 100644 index 0000000000000000000000000000000000000000..e53fee7623cc20c975e5ba28f8357aeac4d97681 --- /dev/null +++ ql/src/test/results/clientpositive/list_bucket_dml_6.q.out @@ -0,0 +1,1005 @@ +PREHOOK: query: -- list bucketing DML: dynamic partition. multiple skewed columns. merge. +-- The following explains merge example used in this test case +-- DML will generated 2 partitions +-- ds=2008-04-08/hr=a1 +-- ds=2008-04-08/hr=b1 +-- without merge, each partition has more files +-- ds=2008-04-08/hr=a1 has 2 files +-- ds=2008-04-08/hr=b1 has 6 files +-- with merge each partition has more files +-- ds=2008-04-08/hr=a1 has 1 files +-- ds=2008-04-08/hr=b1 has 4 files +-- The following shows file size and name in each directory +-- hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- without merge +-- 155 000000_0 +-- 155 000001_0 +-- with merge +-- 254 000000_0 +-- hr=b1/key=103/value=val_103: +-- without merge +-- 99 000000_0 +-- 99 000001_0 +-- with merge +-- 142 000001_0 +-- hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- without merge +-- 5181 000000_0 +-- 5181 000001_0 +-- with merge +-- 5181 000000_0 +-- 5181 000001_0 +-- hr=b1/key=484/value=val_484 +-- without merge +-- 87 000000_0 +-- 87 000001_0 +-- with merge +-- 118 000002_0 + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS + +-- create a skewed table +create table list_bucketing_dynamic_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_dynamic_part +POSTHOOK: query: -- list bucketing DML: dynamic partition. multiple skewed columns. merge. +-- The following explains merge example used in this test case +-- DML will generated 2 partitions +-- ds=2008-04-08/hr=a1 +-- ds=2008-04-08/hr=b1 +-- without merge, each partition has more files +-- ds=2008-04-08/hr=a1 has 2 files +-- ds=2008-04-08/hr=b1 has 6 files +-- with merge each partition has more files +-- ds=2008-04-08/hr=a1 has 1 files +-- ds=2008-04-08/hr=b1 has 4 files +-- The following shows file size and name in each directory +-- hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- without merge +-- 155 000000_0 +-- 155 000001_0 +-- with merge +-- 254 000000_0 +-- hr=b1/key=103/value=val_103: +-- without merge +-- 99 000000_0 +-- 99 000001_0 +-- with merge +-- 142 000001_0 +-- hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- without merge +-- 5181 000000_0 +-- 5181 000001_0 +-- with merge +-- 5181 000000_0 +-- 5181 000001_0 +-- hr=b1/key=484/value=val_484 +-- without merge +-- 87 000000_0 +-- 87 000001_0 +-- with merge +-- 118 000002_0 + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS + +-- create a skewed table +create table list_bucketing_dynamic_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_dynamic_part +PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) +select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) +select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0) = 0.0), 'a1', 'b1') (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/ + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_dynamic_part + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_dynamic_part + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) +select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08 +POSTHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) +select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 +POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 +POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=a1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=a1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- check DML result +show partitions list_bucketing_dynamic_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: query: -- check DML result +show partitions list_bucketing_dynamic_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_dynamic_part +ds=2008-04-08/hr=a1 +ds=2008-04-08/hr=b1 +PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_dynamic_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, a1] +Database: default +Table: list_bucketing_dynamic_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 2 + numRows 16 + rawDataSize 136 + totalSize 310 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key, value] +Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_dynamic_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, b1] +Database: default +Table: list_bucketing_dynamic_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 6 + numRows 984 + rawDataSize 9488 + totalSize 10734 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key, value] +Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=484/value=val_484, [103, val_103]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=103/value=val_103} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- list bucketing DML with merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) +select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML with merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) +select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0) = 0.0), 'a1', 'b1') (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/ + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + partition_columns hr + partition_columns.types string + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_dynamic_part + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + partition_columns hr + partition_columns.types string + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_dynamic_part + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + partition_columns hr + partition_columns.types string + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + partition_columns hr + partition_columns.types string + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_dynamic_part + name: default.list_bucketing_dynamic_part + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + partition_columns hr + partition_columns.types string + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + partition_columns hr + partition_columns.types string + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_dynamic_part + name: default.list_bucketing_dynamic_part + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) +select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08 +POSTHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) +select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 +POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 +POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=a1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=a1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- check DML result +show partitions list_bucketing_dynamic_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: query: -- check DML result +show partitions list_bucketing_dynamic_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_dynamic_part +ds=2008-04-08/hr=a1 +ds=2008-04-08/hr=b1 +PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_dynamic_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, a1] +Database: default +Table: list_bucketing_dynamic_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 16 + rawDataSize 136 + totalSize 254 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key, value] +Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_dynamic_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, b1] +Database: default +Table: list_bucketing_dynamic_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 4 + numRows 984 + rawDataSize 9488 + totalSize 10622 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key, value] +Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=484/value=val_484, [103, val_103]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=103/value=val_103} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +1000 +PREHOOK: query: select count(*) from list_bucketing_dynamic_part +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_dynamic_part +PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 +PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from list_bucketing_dynamic_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 +POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 +#### A masked pattern was here #### +1000 +PREHOOK: query: explain extended +select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr a1 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + numFiles 1 + numRows 16 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 136 + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 254 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_dynamic_part + name: default.list_bucketing_dynamic_part + Partition + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr b1 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + numFiles 4 + numRows 984 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 9488 + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 10622 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_dynamic_part + name: default.list_bucketing_dynamic_part + Processor Tree: + TableScan + alias: list_bucketing_dynamic_part + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), 'val_484' (type: string), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_dynamic_part +PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 +PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 +#### A masked pattern was here #### +POSTHOOK: query: select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 +POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 +#### A masked pattern was here #### +484 val_484 2008-04-08 b1 +484 val_484 2008-04-08 b1 +PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +484 val_484 2008-04-08 11 +484 val_484 2008-04-08 12 +PREHOOK: query: -- clean up +drop table list_bucketing_dynamic_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@list_bucketing_dynamic_part +PREHOOK: Output: default@list_bucketing_dynamic_part +POSTHOOK: query: -- clean up +drop table list_bucketing_dynamic_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: Output: default@list_bucketing_dynamic_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_8.q.java1.7.out ql/src/test/results/clientpositive/list_bucket_dml_8.q.java1.7.out deleted file mode 100644 index d40a69340226cb15a5a695266eff0674422b518f..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_8.q.java1.7.out +++ /dev/null @@ -1,637 +0,0 @@ -PREHOOK: query: -- list bucketing alter table ... concatenate: --- Use list bucketing DML to generate mutilple files in partitions by turning off merge --- dynamic partition. multiple skewed columns. merge. --- The following explains merge example used in this test case --- DML will generated 2 partitions --- ds=2008-04-08/hr=a1 --- ds=2008-04-08/hr=b1 --- without merge, each partition has more files --- ds=2008-04-08/hr=a1 has 2 files --- ds=2008-04-08/hr=b1 has 6 files --- with merge each partition has more files --- ds=2008-04-08/hr=a1 has 1 files --- ds=2008-04-08/hr=b1 has 4 files --- The following shows file size and name in each directory --- hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- without merge --- 155 000000_0 --- 155 000001_0 --- with merge --- 254 000000_0 --- hr=b1/key=103/value=val_103: --- without merge --- 99 000000_0 --- 99 000001_0 --- with merge --- 142 000001_0 --- hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- without merge --- 5181 000000_0 --- 5181 000001_0 --- with merge --- 5181 000000_0 --- 5181 000001_0 --- hr=b1/key=484/value=val_484 --- without merge --- 87 000000_0 --- 87 000001_0 --- with merge --- 118 000002_0 - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- JAVA_VERSION_SPECIFIC_OUTPUT - --- create a skewed table -create table list_bucketing_dynamic_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_dynamic_part -POSTHOOK: query: -- list bucketing alter table ... concatenate: --- Use list bucketing DML to generate mutilple files in partitions by turning off merge --- dynamic partition. multiple skewed columns. merge. --- The following explains merge example used in this test case --- DML will generated 2 partitions --- ds=2008-04-08/hr=a1 --- ds=2008-04-08/hr=b1 --- without merge, each partition has more files --- ds=2008-04-08/hr=a1 has 2 files --- ds=2008-04-08/hr=b1 has 6 files --- with merge each partition has more files --- ds=2008-04-08/hr=a1 has 1 files --- ds=2008-04-08/hr=b1 has 4 files --- The following shows file size and name in each directory --- hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- without merge --- 155 000000_0 --- 155 000001_0 --- with merge --- 254 000000_0 --- hr=b1/key=103/value=val_103: --- without merge --- 99 000000_0 --- 99 000001_0 --- with merge --- 142 000001_0 --- hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- without merge --- 5181 000000_0 --- 5181 000001_0 --- with merge --- 5181 000000_0 --- 5181 000001_0 --- hr=b1/key=484/value=val_484 --- without merge --- 87 000000_0 --- 87 000001_0 --- with merge --- 118 000002_0 - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- JAVA_VERSION_SPECIFIC_OUTPUT - --- create a skewed table -create table list_bucketing_dynamic_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_dynamic_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0) = 0.0), 'a1', 'b1') (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [srcpart] - /srcpart/ds=2008-04-08/hr=12 [srcpart] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08 -POSTHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=a1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=a1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_dynamic_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_dynamic_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_dynamic_part -ds=2008-04-08/hr=a1 -ds=2008-04-08/hr=b1 -PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, a1] -Database: default -Table: list_bucketing_dynamic_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 2 - numRows 16 - rawDataSize 136 - totalSize 310 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, b1] -Database: default -Table: list_bucketing_dynamic_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 6 - numRows 984 - rawDataSize 9488 - totalSize 10734 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[103, val_103]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=103/value=val_103, [484, val_484]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=484/value=val_484} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: -- concatenate the partition and it will merge files -alter table list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') concatenate -PREHOOK: type: ALTER_PARTITION_MERGE -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -POSTHOOK: query: -- concatenate the partition and it will merge files -alter table list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') concatenate -POSTHOOK: type: ALTER_PARTITION_MERGE -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, b1] -Database: default -Table: list_bucketing_dynamic_part -#### A masked pattern was here #### -Partition Parameters: - numFiles 3 - totalSize 10586 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[103, val_103]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=103/value=val_103, [484, val_484]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=484/value=val_484} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -1000 -PREHOOK: query: select count(*) from list_bucketing_dynamic_part -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from list_bucketing_dynamic_part -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -#### A masked pattern was here #### -1000 -PREHOOK: query: explain extended -select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr a1 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - numFiles 2 - numRows 16 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 136 - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 310 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - name: default.list_bucketing_dynamic_part - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr b1 - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - numFiles 3 - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10586 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - name: default.list_bucketing_dynamic_part - Processor Tree: - TableScan - alias: list_bucketing_dynamic_part - Statistics: Num rows: 16 Data size: 136 Basic stats: PARTIAL Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), ds (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE - ListSink - -PREHOOK: query: select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -#### A masked pattern was here #### -484 val_484 2008-04-08 b1 -484 val_484 2008-04-08 b1 -PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' order by hr -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' order by hr -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 12 -PREHOOK: query: -- clean up -drop table list_bucketing_dynamic_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Output: default@list_bucketing_dynamic_part -POSTHOOK: query: -- clean up -drop table list_bucketing_dynamic_part -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Output: default@list_bucketing_dynamic_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_8.q.java1.8.out ql/src/test/results/clientpositive/list_bucket_dml_8.q.java1.8.out deleted file mode 100644 index 9947c1a14d43f0dcd0b45f0700f7bf225b5fe059..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_8.q.java1.8.out +++ /dev/null @@ -1,712 +0,0 @@ -PREHOOK: query: -- list bucketing alter table ... concatenate: --- Use list bucketing DML to generate mutilple files in partitions by turning off merge --- dynamic partition. multiple skewed columns. merge. --- The following explains merge example used in this test case --- DML will generated 2 partitions --- ds=2008-04-08/hr=a1 --- ds=2008-04-08/hr=b1 --- without merge, each partition has more files --- ds=2008-04-08/hr=a1 has 2 files --- ds=2008-04-08/hr=b1 has 6 files --- with merge each partition has more files --- ds=2008-04-08/hr=a1 has 1 files --- ds=2008-04-08/hr=b1 has 4 files --- The following shows file size and name in each directory --- hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- without merge --- 155 000000_0 --- 155 000001_0 --- with merge --- 254 000000_0 --- hr=b1/key=103/value=val_103: --- without merge --- 99 000000_0 --- 99 000001_0 --- with merge --- 142 000001_0 --- hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- without merge --- 5181 000000_0 --- 5181 000001_0 --- with merge --- 5181 000000_0 --- 5181 000001_0 --- hr=b1/key=484/value=val_484 --- without merge --- 87 000000_0 --- 87 000001_0 --- with merge --- 118 000002_0 - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- JAVA_VERSION_SPECIFIC_OUTPUT - --- create a skewed table -create table list_bucketing_dynamic_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_dynamic_part -POSTHOOK: query: -- list bucketing alter table ... concatenate: --- Use list bucketing DML to generate mutilple files in partitions by turning off merge --- dynamic partition. multiple skewed columns. merge. --- The following explains merge example used in this test case --- DML will generated 2 partitions --- ds=2008-04-08/hr=a1 --- ds=2008-04-08/hr=b1 --- without merge, each partition has more files --- ds=2008-04-08/hr=a1 has 2 files --- ds=2008-04-08/hr=b1 has 6 files --- with merge each partition has more files --- ds=2008-04-08/hr=a1 has 1 files --- ds=2008-04-08/hr=b1 has 4 files --- The following shows file size and name in each directory --- hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- without merge --- 155 000000_0 --- 155 000001_0 --- with merge --- 254 000000_0 --- hr=b1/key=103/value=val_103: --- without merge --- 99 000000_0 --- 99 000001_0 --- with merge --- 142 000001_0 --- hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- without merge --- 5181 000000_0 --- 5181 000001_0 --- with merge --- 5181 000000_0 --- 5181 000001_0 --- hr=b1/key=484/value=val_484 --- without merge --- 87 000000_0 --- 87 000001_0 --- with merge --- 118 000002_0 - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- JAVA_VERSION_SPECIFIC_OUTPUT - --- create a skewed table -create table list_bucketing_dynamic_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_dynamic_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - srcpart - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - list_bucketing_dynamic_part - TOK_PARTSPEC - TOK_PARTVAL - ds - '2008-04-08' - TOK_PARTVAL - hr - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - value - TOK_SELEXPR - TOK_FUNCTION - if - == - % - TOK_TABLE_OR_COL - key - 100 - 0 - 'a1' - 'b1' - TOK_WHERE - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0) = 0.0), 'a1', 'b1') (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:srcpart] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08 -POSTHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) -select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=a1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=a1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_dynamic_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_dynamic_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_dynamic_part -ds=2008-04-08/hr=a1 -ds=2008-04-08/hr=b1 -PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, a1] -Database: default -Table: list_bucketing_dynamic_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 2 - numRows 16 - rawDataSize 136 - totalSize 310 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, b1] -Database: default -Table: list_bucketing_dynamic_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 6 - numRows 984 - rawDataSize 9488 - totalSize 10734 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=484/value=val_484, [103, val_103]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=103/value=val_103} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: -- concatenate the partition and it will merge files -alter table list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') concatenate -PREHOOK: type: ALTER_PARTITION_MERGE -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -POSTHOOK: query: -- concatenate the partition and it will merge files -alter table list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') concatenate -POSTHOOK: type: ALTER_PARTITION_MERGE -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, b1] -Database: default -Table: list_bucketing_dynamic_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 3 - numRows 0 - rawDataSize 0 - totalSize 10586 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=484/value=val_484, [103, val_103]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=103/value=val_103} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -1000 -PREHOOK: query: select count(*) from list_bucketing_dynamic_part -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from list_bucketing_dynamic_part -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -#### A masked pattern was here #### -1000 -PREHOOK: query: explain extended -select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - list_bucketing_dynamic_part - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - and - = - TOK_TABLE_OR_COL - key - '484' - = - TOK_TABLE_OR_COL - value - 'val_484' - - -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr a1 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - numFiles 2 - numRows 16 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 136 - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 310 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - name: default.list_bucketing_dynamic_part - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr b1 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - numFiles 3 - numRows 0 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 0 - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10586 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_dynamic_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_dynamic_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_dynamic_part - name: default.list_bucketing_dynamic_part - Processor Tree: - TableScan - alias: list_bucketing_dynamic_part - Statistics: Num rows: 16 Data size: 136 Basic stats: PARTIAL Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), ds (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE - ListSink - -PREHOOK: query: select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 -POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 -#### A masked pattern was here #### -484 val_484 2008-04-08 b1 -484 val_484 2008-04-08 b1 -PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' order by hr -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' order by hr -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 12 -PREHOOK: query: -- clean up -drop table list_bucketing_dynamic_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_dynamic_part -PREHOOK: Output: default@list_bucketing_dynamic_part -POSTHOOK: query: -- clean up -drop table list_bucketing_dynamic_part -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_dynamic_part -POSTHOOK: Output: default@list_bucketing_dynamic_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_8.q.out ql/src/test/results/clientpositive/list_bucket_dml_8.q.out new file mode 100644 index 0000000000000000000000000000000000000000..ce62e0b2e947e69109ab63e3cd9693c5cef6d0b9 --- /dev/null +++ ql/src/test/results/clientpositive/list_bucket_dml_8.q.out @@ -0,0 +1,635 @@ +PREHOOK: query: -- list bucketing alter table ... concatenate: +-- Use list bucketing DML to generate mutilple files in partitions by turning off merge +-- dynamic partition. multiple skewed columns. merge. +-- The following explains merge example used in this test case +-- DML will generated 2 partitions +-- ds=2008-04-08/hr=a1 +-- ds=2008-04-08/hr=b1 +-- without merge, each partition has more files +-- ds=2008-04-08/hr=a1 has 2 files +-- ds=2008-04-08/hr=b1 has 6 files +-- with merge each partition has more files +-- ds=2008-04-08/hr=a1 has 1 files +-- ds=2008-04-08/hr=b1 has 4 files +-- The following shows file size and name in each directory +-- hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- without merge +-- 155 000000_0 +-- 155 000001_0 +-- with merge +-- 254 000000_0 +-- hr=b1/key=103/value=val_103: +-- without merge +-- 99 000000_0 +-- 99 000001_0 +-- with merge +-- 142 000001_0 +-- hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- without merge +-- 5181 000000_0 +-- 5181 000001_0 +-- with merge +-- 5181 000000_0 +-- 5181 000001_0 +-- hr=b1/key=484/value=val_484 +-- without merge +-- 87 000000_0 +-- 87 000001_0 +-- with merge +-- 118 000002_0 + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) + +-- create a skewed table +create table list_bucketing_dynamic_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_dynamic_part +POSTHOOK: query: -- list bucketing alter table ... concatenate: +-- Use list bucketing DML to generate mutilple files in partitions by turning off merge +-- dynamic partition. multiple skewed columns. merge. +-- The following explains merge example used in this test case +-- DML will generated 2 partitions +-- ds=2008-04-08/hr=a1 +-- ds=2008-04-08/hr=b1 +-- without merge, each partition has more files +-- ds=2008-04-08/hr=a1 has 2 files +-- ds=2008-04-08/hr=b1 has 6 files +-- with merge each partition has more files +-- ds=2008-04-08/hr=a1 has 1 files +-- ds=2008-04-08/hr=b1 has 4 files +-- The following shows file size and name in each directory +-- hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- without merge +-- 155 000000_0 +-- 155 000001_0 +-- with merge +-- 254 000000_0 +-- hr=b1/key=103/value=val_103: +-- without merge +-- 99 000000_0 +-- 99 000001_0 +-- with merge +-- 142 000001_0 +-- hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- without merge +-- 5181 000000_0 +-- 5181 000001_0 +-- with merge +-- 5181 000000_0 +-- 5181 000001_0 +-- hr=b1/key=484/value=val_484 +-- without merge +-- 87 000000_0 +-- 87 000001_0 +-- with merge +-- 118 000002_0 + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) + +-- create a skewed table +create table list_bucketing_dynamic_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_dynamic_part +PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) +select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) +select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0) = 0.0), 'a1', 'b1') (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/ + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_dynamic_part + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_dynamic_part + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) +select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08 +POSTHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) +select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 +POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 +POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=a1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=a1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- check DML result +show partitions list_bucketing_dynamic_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: query: -- check DML result +show partitions list_bucketing_dynamic_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_dynamic_part +ds=2008-04-08/hr=a1 +ds=2008-04-08/hr=b1 +PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_dynamic_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, a1] +Database: default +Table: list_bucketing_dynamic_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 2 + numRows 16 + rawDataSize 136 + totalSize 310 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key, value] +Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_dynamic_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, b1] +Database: default +Table: list_bucketing_dynamic_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 6 + numRows 984 + rawDataSize 9488 + totalSize 10734 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key, value] +Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=484/value=val_484, [103, val_103]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=103/value=val_103} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- concatenate the partition and it will merge files +alter table list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') concatenate +PREHOOK: type: ALTER_PARTITION_MERGE +PREHOOK: Input: default@list_bucketing_dynamic_part +PREHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 +POSTHOOK: query: -- concatenate the partition and it will merge files +alter table list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') concatenate +POSTHOOK: type: ALTER_PARTITION_MERGE +POSTHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: Output: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 +PREHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: query: desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_dynamic_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, b1] +Database: default +Table: list_bucketing_dynamic_part +#### A masked pattern was here #### +Partition Parameters: + numFiles 3 + totalSize 10586 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key, value] +Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=484/value=val_484, [103, val_103]=/list_bucketing_dynamic_part/ds=2008-04-08/hr=b1/key=103/value=val_103} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +1000 +PREHOOK: query: select count(*) from list_bucketing_dynamic_part +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_dynamic_part +PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 +PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from list_bucketing_dynamic_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 +POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 +#### A masked pattern was here #### +1000 +PREHOOK: query: explain extended +select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr a1 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + numFiles 2 + numRows 16 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 136 + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 310 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_dynamic_part + name: default.list_bucketing_dynamic_part + Partition + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr b1 + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + numFiles 3 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 10586 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_dynamic_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_dynamic_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_dynamic_part + name: default.list_bucketing_dynamic_part + Processor Tree: + TableScan + alias: list_bucketing_dynamic_part + Statistics: Num rows: 16 Data size: 136 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), 'val_484' (type: string), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_dynamic_part +PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 +PREHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 +#### A masked pattern was here #### +POSTHOOK: query: select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=a1 +POSTHOOK: Input: default@list_bucketing_dynamic_part@ds=2008-04-08/hr=b1 +#### A masked pattern was here #### +484 val_484 2008-04-08 b1 +484 val_484 2008-04-08 b1 +PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' order by hr +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' order by hr +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +484 val_484 2008-04-08 11 +484 val_484 2008-04-08 12 +PREHOOK: query: -- clean up +drop table list_bucketing_dynamic_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@list_bucketing_dynamic_part +PREHOOK: Output: default@list_bucketing_dynamic_part +POSTHOOK: query: -- clean up +drop table list_bucketing_dynamic_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@list_bucketing_dynamic_part +POSTHOOK: Output: default@list_bucketing_dynamic_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_9.q.java1.7.out ql/src/test/results/clientpositive/list_bucket_dml_9.q.java1.7.out deleted file mode 100644 index 752ea4e47ee7832b181814c00018de327cce2dea..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_9.q.java1.7.out +++ /dev/null @@ -1,813 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. merge. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103: --- 99 000000_0 --- 99 000001_0 --- after merge --- 142 000000_0 --- ds=2008-04-08/hr=11/key=484: --- 87 000000_0 --- 87 000001_0 --- after merge --- 118 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key) on ('484','103') - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. merge. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103: --- 99 000000_0 --- 99 000001_0 --- after merge --- 142 000000_0 --- ds=2008-04-08/hr=11/key=484: --- 87 000000_0 --- 87 000001_0 --- after merge --- 118 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key) on ('484','103') - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_static_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [srcpart] - /srcpart/ds=2008-04-08/hr=12 [srcpart] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 6 - numRows 1000 - rawDataSize 9624 - totalSize 10898 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key] -Skewed Values: [[484], [103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484, [103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: -- list bucketing DML with merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML with merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [srcpart] - /srcpart/ds=2008-04-08/hr=12 [srcpart] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 - numRows 1000 - rawDataSize 9624 - totalSize 10786 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key] -Skewed Values: [[484], [103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484, [103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -1000 -PREHOOK: query: select count(*) from list_bucketing_static_part -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from list_bucketing_static_part -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -1000 -PREHOOK: query: explain extended -select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - numFiles 4 - numRows 1000 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 9624 - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10786 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - Processor Tree: - TableScan - alias: list_bucketing_static_part - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - ListSink - -PREHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 11 -PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 12 -PREHOOK: query: -- clean up -drop table list_bucketing_static_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- clean up -drop table list_bucketing_static_part -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Output: default@list_bucketing_static_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_9.q.java1.8.out ql/src/test/results/clientpositive/list_bucket_dml_9.q.java1.8.out deleted file mode 100644 index 599d3b000a000c4af21b178a8806ae62c1c258d9..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/list_bucket_dml_9.q.java1.8.out +++ /dev/null @@ -1,915 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. merge. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103: --- 99 000000_0 --- 99 000001_0 --- after merge --- 142 000000_0 --- ds=2008-04-08/hr=11/key=484: --- 87 000000_0 --- 87 000001_0 --- after merge --- 118 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key) on ('484','103') - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. merge. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103: --- 99 000000_0 --- 99 000001_0 --- after merge --- 142 000000_0 --- ds=2008-04-08/hr=11/key=484: --- 87 000000_0 --- 87 000001_0 --- after merge --- 118 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key) on ('484','103') - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_static_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - srcpart - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - list_bucketing_static_part - TOK_PARTSPEC - TOK_PARTVAL - ds - '2008-04-08' - TOK_PARTVAL - hr - '11' - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - value - TOK_WHERE - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [srcpart] - /srcpart/ds=2008-04-08/hr=12 [srcpart] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 6 - numRows 1000 - rawDataSize 9624 - totalSize 10898 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key] -Skewed Values: [[484], [103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103, [484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: -- list bucketing DML with merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML with merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - srcpart - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - list_bucketing_static_part - TOK_PARTSPEC - TOK_PARTVAL - ds - '2008-04-08' - TOK_PARTVAL - hr - '11' - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - value - TOK_WHERE - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 - Stage-5 - Stage-6 depends on stages: Stage-5 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [srcpart] - /srcpart/ds=2008-04-08/hr=12 [srcpart] - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-4 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-3 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-5 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 4 - numRows 1000 - rawDataSize 9624 - totalSize 10786 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key] -Skewed Values: [[484], [103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103, [484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -1000 -PREHOOK: query: select count(*) from list_bucketing_static_part -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from list_bucketing_static_part -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -1000 -PREHOOK: query: explain extended -select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - list_bucketing_static_part - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - and - and - and - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - = - TOK_TABLE_OR_COL - hr - '11' - = - TOK_TABLE_OR_COL - key - '484' - = - TOK_TABLE_OR_COL - value - 'val_484' - - -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - numFiles 4 - numRows 1000 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 9624 - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10786 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - Processor Tree: - TableScan - alias: list_bucketing_static_part - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - ListSink - -PREHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 11 -PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 12 -PREHOOK: query: -- clean up -drop table list_bucketing_static_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- clean up -drop table list_bucketing_static_part -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Output: default@list_bucketing_static_part diff --git ql/src/test/results/clientpositive/list_bucket_dml_9.q.out ql/src/test/results/clientpositive/list_bucket_dml_9.q.out new file mode 100644 index 0000000000000000000000000000000000000000..81f3af37b9841c2f622c8cb8ba743c24e76ea356 --- /dev/null +++ ql/src/test/results/clientpositive/list_bucket_dml_9.q.out @@ -0,0 +1,811 @@ +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS + +-- list bucketing DML: static partition. multiple skewed columns. merge. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103: +-- 99 000000_0 +-- 99 000001_0 +-- after merge +-- 142 000000_0 +-- ds=2008-04-08/hr=11/key=484: +-- 87 000000_0 +-- 87 000001_0 +-- after merge +-- 118 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key) on ('484','103') + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_static_part +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- SORT_QUERY_RESULTS + +-- list bucketing DML: static partition. multiple skewed columns. merge. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103: +-- 99 000000_0 +-- 99 000001_0 +-- after merge +-- 142 000000_0 +-- ds=2008-04-08/hr=11/key=484: +-- 87 000000_0 +-- 87 000001_0 +-- after merge +-- 118 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key) on ('484','103') + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_static_part +PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 11 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- check DML result +show partitions list_bucketing_static_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: -- check DML result +show partitions list_bucketing_static_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_static_part +ds=2008-04-08/hr=11 +PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_static_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: list_bucketing_static_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 6 + numRows 1000 + rawDataSize 9624 + totalSize 10898 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key] +Skewed Values: [[484], [103]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103, [484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- list bucketing DML with merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML with merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 11 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- check DML result +show partitions list_bucketing_static_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: -- check DML result +show partitions list_bucketing_static_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_static_part +ds=2008-04-08/hr=11 +PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_static_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: list_bucketing_static_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 4 + numRows 1000 + rawDataSize 9624 + totalSize 10786 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key] +Skewed Values: [[484], [103]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103, [484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +1000 +PREHOOK: query: select count(*) from list_bucketing_static_part +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_static_part +PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from list_bucketing_static_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_static_part +POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +1000 +PREHOOK: query: explain extended +select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + numFiles 4 + numRows 1000 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 9624 + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + totalSize 10786 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + name: default.list_bucketing_static_part + Processor Tree: + TableScan + alias: list_bucketing_static_part + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = '484') and (value = 'val_484')) (type: boolean) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' +PREHOOK: type: QUERY +PREHOOK: Input: default@list_bucketing_static_part +PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@list_bucketing_static_part +POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +484 val_484 2008-04-08 11 +484 val_484 2008-04-08 11 +PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +484 val_484 2008-04-08 11 +484 val_484 2008-04-08 12 +PREHOOK: query: -- clean up +drop table list_bucketing_static_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@list_bucketing_static_part +PREHOOK: Output: default@list_bucketing_static_part +POSTHOOK: query: -- clean up +drop table list_bucketing_static_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@list_bucketing_static_part +POSTHOOK: Output: default@list_bucketing_static_part diff --git ql/src/test/results/clientpositive/llap/join0.q.java1.7.out ql/src/test/results/clientpositive/llap/join0.q.java1.7.out deleted file mode 100644 index 5651839c647a3de2ca29ba0c5fc5b93cb9cf507d..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/llap/join0.q.java1.7.out +++ /dev/null @@ -1,242 +0,0 @@ -Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT --- SORT_QUERY_RESULTS - -EXPLAIN -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT --- SORT_QUERY_RESULTS - -EXPLAIN -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: llap - Map 4 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: llap - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: uber - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN FORMATTED -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN FORMATTED -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -#### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 2 val_2 -0 val_0 2 val_2 -0 val_0 2 val_2 -0 val_0 4 val_4 -0 val_0 4 val_4 -0 val_0 4 val_4 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 8 val_8 -0 val_0 8 val_8 -0 val_0 8 val_8 -0 val_0 9 val_9 -0 val_0 9 val_9 -0 val_0 9 val_9 -2 val_2 0 val_0 -2 val_2 0 val_0 -2 val_2 0 val_0 -2 val_2 2 val_2 -2 val_2 4 val_4 -2 val_2 5 val_5 -2 val_2 5 val_5 -2 val_2 5 val_5 -2 val_2 8 val_8 -2 val_2 9 val_9 -4 val_4 0 val_0 -4 val_4 0 val_0 -4 val_4 0 val_0 -4 val_4 2 val_2 -4 val_4 4 val_4 -4 val_4 5 val_5 -4 val_4 5 val_5 -4 val_4 5 val_5 -4 val_4 8 val_8 -4 val_4 9 val_9 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 2 val_2 -5 val_5 2 val_2 -5 val_5 2 val_2 -5 val_5 4 val_4 -5 val_5 4 val_4 -5 val_5 4 val_4 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 8 val_8 -5 val_5 8 val_8 -5 val_5 8 val_8 -5 val_5 9 val_9 -5 val_5 9 val_9 -5 val_5 9 val_9 -8 val_8 0 val_0 -8 val_8 0 val_0 -8 val_8 0 val_0 -8 val_8 2 val_2 -8 val_8 4 val_4 -8 val_8 5 val_5 -8 val_8 5 val_5 -8 val_8 5 val_5 -8 val_8 8 val_8 -8 val_8 9 val_9 -9 val_9 0 val_0 -9 val_9 0 val_0 -9 val_9 0 val_0 -9 val_9 2 val_2 -9 val_9 4 val_4 -9 val_9 5 val_5 -9 val_9 5 val_5 -9 val_9 5 val_5 -9 val_9 8 val_8 -9 val_9 9 val_9 diff --git ql/src/test/results/clientpositive/llap/join0.q.java1.8.out ql/src/test/results/clientpositive/llap/join0.q.java1.8.out deleted file mode 100644 index 5651839c647a3de2ca29ba0c5fc5b93cb9cf507d..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/llap/join0.q.java1.8.out +++ /dev/null @@ -1,242 +0,0 @@ -Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT --- SORT_QUERY_RESULTS - -EXPLAIN -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT --- SORT_QUERY_RESULTS - -EXPLAIN -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: llap - Map 4 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: llap - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: uber - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN FORMATTED -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN FORMATTED -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -#### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 2 val_2 -0 val_0 2 val_2 -0 val_0 2 val_2 -0 val_0 4 val_4 -0 val_0 4 val_4 -0 val_0 4 val_4 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 8 val_8 -0 val_0 8 val_8 -0 val_0 8 val_8 -0 val_0 9 val_9 -0 val_0 9 val_9 -0 val_0 9 val_9 -2 val_2 0 val_0 -2 val_2 0 val_0 -2 val_2 0 val_0 -2 val_2 2 val_2 -2 val_2 4 val_4 -2 val_2 5 val_5 -2 val_2 5 val_5 -2 val_2 5 val_5 -2 val_2 8 val_8 -2 val_2 9 val_9 -4 val_4 0 val_0 -4 val_4 0 val_0 -4 val_4 0 val_0 -4 val_4 2 val_2 -4 val_4 4 val_4 -4 val_4 5 val_5 -4 val_4 5 val_5 -4 val_4 5 val_5 -4 val_4 8 val_8 -4 val_4 9 val_9 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 2 val_2 -5 val_5 2 val_2 -5 val_5 2 val_2 -5 val_5 4 val_4 -5 val_5 4 val_4 -5 val_5 4 val_4 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 8 val_8 -5 val_5 8 val_8 -5 val_5 8 val_8 -5 val_5 9 val_9 -5 val_5 9 val_9 -5 val_5 9 val_9 -8 val_8 0 val_0 -8 val_8 0 val_0 -8 val_8 0 val_0 -8 val_8 2 val_2 -8 val_8 4 val_4 -8 val_8 5 val_5 -8 val_8 5 val_5 -8 val_8 5 val_5 -8 val_8 8 val_8 -8 val_8 9 val_9 -9 val_9 0 val_0 -9 val_9 0 val_0 -9 val_9 0 val_0 -9 val_9 2 val_2 -9 val_9 4 val_4 -9 val_9 5 val_5 -9 val_9 5 val_5 -9 val_9 5 val_5 -9 val_9 8 val_8 -9 val_9 9 val_9 diff --git ql/src/test/results/clientpositive/llap/join0.q.out ql/src/test/results/clientpositive/llap/join0.q.out new file mode 100644 index 0000000000000000000000000000000000000000..f177afc5cc8c55fc4ae7568864017c97a83a9491 --- /dev/null +++ ql/src/test/results/clientpositive/llap/join0.q.out @@ -0,0 +1,243 @@ +Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +PREHOOK: type: QUERY +POSTHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: EXPLAIN FORMATTED +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FORMATTED +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +POSTHOOK: type: QUERY +#### A masked pattern was here #### +Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 2 val_2 +0 val_0 2 val_2 +0 val_0 2 val_2 +0 val_0 4 val_4 +0 val_0 4 val_4 +0 val_0 4 val_4 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 8 val_8 +0 val_0 8 val_8 +0 val_0 8 val_8 +0 val_0 9 val_9 +0 val_0 9 val_9 +0 val_0 9 val_9 +2 val_2 0 val_0 +2 val_2 0 val_0 +2 val_2 0 val_0 +2 val_2 2 val_2 +2 val_2 4 val_4 +2 val_2 5 val_5 +2 val_2 5 val_5 +2 val_2 5 val_5 +2 val_2 8 val_8 +2 val_2 9 val_9 +4 val_4 0 val_0 +4 val_4 0 val_0 +4 val_4 0 val_0 +4 val_4 2 val_2 +4 val_4 4 val_4 +4 val_4 5 val_5 +4 val_4 5 val_5 +4 val_4 5 val_5 +4 val_4 8 val_8 +4 val_4 9 val_9 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 2 val_2 +5 val_5 2 val_2 +5 val_5 2 val_2 +5 val_5 4 val_4 +5 val_5 4 val_4 +5 val_5 4 val_4 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 8 val_8 +5 val_5 8 val_8 +5 val_5 8 val_8 +5 val_5 9 val_9 +5 val_5 9 val_9 +5 val_5 9 val_9 +8 val_8 0 val_0 +8 val_8 0 val_0 +8 val_8 0 val_0 +8 val_8 2 val_2 +8 val_8 4 val_4 +8 val_8 5 val_5 +8 val_8 5 val_5 +8 val_8 5 val_5 +8 val_8 8 val_8 +8 val_8 9 val_9 +9 val_9 0 val_0 +9 val_9 0 val_0 +9 val_9 0 val_0 +9 val_9 2 val_2 +9 val_9 4 val_4 +9 val_9 5 val_5 +9 val_9 5 val_5 +9 val_9 5 val_5 +9 val_9 8 val_8 +9 val_9 9 val_9 diff --git ql/src/test/results/clientpositive/llap/vector_cast_constant.q.java1.7.out ql/src/test/results/clientpositive/llap/vector_cast_constant.q.java1.7.out deleted file mode 100644 index 22b5d934876ae98bd5ccccf48e55c6de9da9403c..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/llap/vector_cast_constant.q.java1.7.out +++ /dev/null @@ -1,217 +0,0 @@ -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE over1korc -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE over1korc -POSTHOOK: type: DROPTABLE -PREHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1k -POSTHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1k -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@over1k -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@over1k -PREHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1korc -POSTHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1korc -PREHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -PREHOOK: type: QUERY -PREHOOK: Input: default@over1k -PREHOOK: Output: default@over1korc -POSTHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1k -POSTHOOK: Output: default@over1korc -POSTHOOK: Lineage: over1korc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ] -POSTHOOK: Lineage: over1korc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ] -POSTHOOK: Lineage: over1korc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ] -POSTHOOK: Lineage: over1korc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] -POSTHOOK: Lineage: over1korc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -POSTHOOK: Lineage: over1korc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] -POSTHOOK: Lineage: over1korc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] -POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ] -POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] -POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] -POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(50), avg(50.0), avg(50) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Execution mode: vectorized, llap - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) - Reducer 3 - Execution mode: vectorized, uber - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@over1korc -#### A masked pattern was here #### -POSTHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1korc -#### A masked pattern was here #### -65536 50.0 50.0 50 -65537 50.0 50.0 50 -65538 50.0 50.0 50 -65539 50.0 50.0 50 -65540 50.0 50.0 50 -65541 50.0 50.0 50 -65542 50.0 50.0 50 -65543 50.0 50.0 50 -65544 50.0 50.0 50 -65545 50.0 50.0 50 diff --git ql/src/test/results/clientpositive/llap/vector_cast_constant.q.java1.8.out ql/src/test/results/clientpositive/llap/vector_cast_constant.q.java1.8.out deleted file mode 100644 index 22b5d934876ae98bd5ccccf48e55c6de9da9403c..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/llap/vector_cast_constant.q.java1.8.out +++ /dev/null @@ -1,217 +0,0 @@ -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE over1korc -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE over1korc -POSTHOOK: type: DROPTABLE -PREHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1k -POSTHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1k -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@over1k -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@over1k -PREHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1korc -POSTHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1korc -PREHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -PREHOOK: type: QUERY -PREHOOK: Input: default@over1k -PREHOOK: Output: default@over1korc -POSTHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1k -POSTHOOK: Output: default@over1korc -POSTHOOK: Lineage: over1korc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ] -POSTHOOK: Lineage: over1korc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ] -POSTHOOK: Lineage: over1korc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ] -POSTHOOK: Lineage: over1korc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] -POSTHOOK: Lineage: over1korc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -POSTHOOK: Lineage: over1korc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] -POSTHOOK: Lineage: over1korc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] -POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ] -POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] -POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] -POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(50), avg(50.0), avg(50) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Execution mode: vectorized, llap - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) - Reducer 3 - Execution mode: vectorized, uber - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@over1korc -#### A masked pattern was here #### -POSTHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1korc -#### A masked pattern was here #### -65536 50.0 50.0 50 -65537 50.0 50.0 50 -65538 50.0 50.0 50 -65539 50.0 50.0 50 -65540 50.0 50.0 50 -65541 50.0 50.0 50 -65542 50.0 50.0 50 -65543 50.0 50.0 50 -65544 50.0 50.0 50 -65545 50.0 50.0 50 diff --git ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out new file mode 100644 index 0000000000000000000000000000000000000000..fc8eb1c16815d920f78766f35cfe2f9a43153d0c --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -0,0 +1,216 @@ +PREHOOK: query: DROP TABLE over1k +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE over1k +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE over1korc +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE over1korc +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k +POSTHOOK: query: -- data setup +CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over1k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over1k +PREHOOK: query: CREATE TABLE over1korc(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1korc +POSTHOOK: query: CREATE TABLE over1korc(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1korc +PREHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k +PREHOOK: Output: default@over1korc +POSTHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k +POSTHOOK: Output: default@over1korc +POSTHOOK: Lineage: over1korc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: over1korc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: over1korc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: over1korc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: over1korc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: over1korc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: over1korc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] +PREHOOK: query: EXPLAIN SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, + AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, + AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1korc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(50), avg(50.0), avg(50) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, + AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1korc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, + AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1korc +#### A masked pattern was here #### +65536 50.0 50.0 50.0000 +65537 50.0 50.0 50.0000 +65538 50.0 50.0 50.0000 +65539 50.0 50.0 50.0000 +65540 50.0 50.0 50.0000 +65541 50.0 50.0 50.0000 +65542 50.0 50.0 50.0000 +65543 50.0 50.0 50.0000 +65544 50.0 50.0 50.0000 +65545 50.0 50.0 50.0000 diff --git ql/src/test/results/clientpositive/outer_join_ppr.q.java1.7.out ql/src/test/results/clientpositive/outer_join_ppr.q.java1.7.out deleted file mode 100644 index 1312e5388f1bc82b9b8cf13a198355fd1c24897b..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/outer_join_ppr.q.java1.7.out +++ /dev/null @@ -1,685 +0,0 @@ -PREHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -PREHOOK: type: QUERY -POSTHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string) - auto parallelism: false - TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string), _col2 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /src [$hdt$_0:a] - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:b] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:b] - /srcpart/ds=2008-04-09/hr=11 [$hdt$_1:b] - /srcpart/ds=2008-04-09/hr=12 [$hdt$_1:b] - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - filter mappings: - 1 [0, 1] - filter predicates: - 0 - 1 {(VALUE._col1 = '2008-04-08')} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0) and (UDFToDouble(_col2) > 15.0) and (UDFToDouble(_col2) < 25.0)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -#### A masked pattern was here #### -17 val_17 17 val_17 -17 val_17 17 val_17 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -19 val_19 19 val_19 -19 val_19 19 val_19 -PREHOOK: query: EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string) - auto parallelism: false - TableScan - alias: b - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /src [$hdt$_0:a] - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:b] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:b] - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -17 val_17 17 val_17 -17 val_17 17 val_17 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -19 val_19 19 val_19 -19 val_19 19 val_19 diff --git ql/src/test/results/clientpositive/outer_join_ppr.q.java1.8.out ql/src/test/results/clientpositive/outer_join_ppr.q.java1.8.out deleted file mode 100644 index b9c1a66c1758b5207ba0e0461a520998981b5788..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/outer_join_ppr.q.java1.8.out +++ /dev/null @@ -1,855 +0,0 @@ -PREHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -PREHOOK: type: QUERY -POSTHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_FULLOUTERJOIN - TOK_TABREF - TOK_TABNAME - src - a - TOK_TABREF - TOK_TABNAME - srcpart - b - AND - = - . - TOK_TABLE_OR_COL - a - key - . - TOK_TABLE_OR_COL - b - key - = - . - TOK_TABLE_OR_COL - b - ds - '2008-04-08' - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - . - TOK_TABLE_OR_COL - a - key - TOK_SELEXPR - . - TOK_TABLE_OR_COL - a - value - TOK_SELEXPR - . - TOK_TABLE_OR_COL - b - key - TOK_SELEXPR - . - TOK_TABLE_OR_COL - b - value - TOK_WHERE - AND - AND - AND - > - . - TOK_TABLE_OR_COL - a - key - 10 - < - . - TOK_TABLE_OR_COL - a - key - 20 - > - . - TOK_TABLE_OR_COL - b - key - 15 - < - . - TOK_TABLE_OR_COL - b - key - 25 - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string) - auto parallelism: false - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /src [$hdt$_0:$hdt$_1:a] - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:b] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:b] - /srcpart/ds=2008-04-09/hr=11 [$hdt$_0:$hdt$_0:b] - /srcpart/ds=2008-04-09/hr=12 [$hdt$_0:$hdt$_0:b] - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - filter mappings: - 0 [1, 1] - filter predicates: - 0 {(VALUE._col1 = '2008-04-08')} - 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) and (UDFToDouble(_col0) > 15.0)) and (UDFToDouble(_col0) < 25.0)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -#### A masked pattern was here #### -17 val_17 17 val_17 -17 val_17 17 val_17 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -19 val_19 19 val_19 -19 val_19 19 val_19 -PREHOOK: query: EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_FULLOUTERJOIN - TOK_TABREF - TOK_TABNAME - src - a - TOK_TABREF - TOK_TABNAME - srcpart - b - = - . - TOK_TABLE_OR_COL - a - key - . - TOK_TABLE_OR_COL - b - key - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - . - TOK_TABLE_OR_COL - a - key - TOK_SELEXPR - . - TOK_TABLE_OR_COL - a - value - TOK_SELEXPR - . - TOK_TABLE_OR_COL - b - key - TOK_SELEXPR - . - TOK_TABLE_OR_COL - b - value - TOK_WHERE - AND - AND - AND - AND - > - . - TOK_TABLE_OR_COL - a - key - 10 - < - . - TOK_TABLE_OR_COL - a - key - 20 - > - . - TOK_TABLE_OR_COL - b - key - 15 - < - . - TOK_TABLE_OR_COL - b - key - 25 - = - . - TOK_TABLE_OR_COL - b - ds - '2008-04-08' - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string) - auto parallelism: false - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /src [$hdt$_0:$hdt$_1:a] - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:$hdt$_0:b] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:$hdt$_0:b] - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -17 val_17 17 val_17 -17 val_17 17 val_17 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -19 val_19 19 val_19 -19 val_19 19 val_19 diff --git ql/src/test/results/clientpositive/outer_join_ppr.q.out ql/src/test/results/clientpositive/outer_join_ppr.q.out new file mode 100644 index 0000000000000000000000000000000000000000..cf2085140505d794122836ae456e83594e2d12a8 --- /dev/null +++ ql/src/test/results/clientpositive/outer_join_ppr.q.out @@ -0,0 +1,683 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +POSTHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false + TableScan + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string), _col2 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /src [$hdt$_0:a] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:b] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:b] + /srcpart/ds=2008-04-09/hr=11 [$hdt$_1:b] + /srcpart/ds=2008-04-09/hr=12 [$hdt$_1:b] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + filter mappings: + 1 [0, 1] + filter predicates: + 0 + 1 {(VALUE._col1 = '2008-04-08')} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0) and (UDFToDouble(_col2) > 15.0) and (UDFToDouble(_col2) < 25.0)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 +PREHOOK: query: EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false + TableScan + alias: b + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /src [$hdt$_0:a] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:b] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:b] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 diff --git ql/src/test/results/clientpositive/parquet_map_null.q.java1.7.out ql/src/test/results/clientpositive/parquet_map_null.q.java1.7.out deleted file mode 100644 index 825e6680183fe212f8ae1ef699c3c86654c77b38..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/parquet_map_null.q.java1.7.out +++ /dev/null @@ -1,70 +0,0 @@ -PREHOOK: query: -- This test attempts to write a parquet table from an avro table that contains map null values --- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE IF EXISTS avro_table -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- This test attempts to write a parquet table from an avro table that contains map null values --- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE IF EXISTS avro_table -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE IF EXISTS parquet_table -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE IF EXISTS parquet_table -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@avro_table -POSTHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@avro_table -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@avro_table -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@avro_table -PREHOOK: query: CREATE TABLE parquet_table STORED AS PARQUET AS SELECT * FROM avro_table -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@avro_table -PREHOOK: Output: database:default -PREHOOK: Output: default@parquet_table -POSTHOOK: query: CREATE TABLE parquet_table STORED AS PARQUET AS SELECT * FROM avro_table -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@avro_table -POSTHOOK: Output: database:default -POSTHOOK: Output: default@parquet_table -POSTHOOK: Lineage: parquet_table.avreau_col_1 SIMPLE [(avro_table)avro_table.FieldSchema(name:avreau_col_1, type:map, comment:), ] -PREHOOK: query: SELECT * FROM parquet_table -PREHOOK: type: QUERY -PREHOOK: Input: default@parquet_table -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM parquet_table -POSTHOOK: type: QUERY -POSTHOOK: Input: default@parquet_table -#### A masked pattern was here #### -{"key4":null,"key3":"val3"} -{"key4":null,"key3":"val3"} -{"key2":"val2","key1":null} -{"key4":null,"key3":"val3"} -{"key4":null,"key3":"val3"} -PREHOOK: query: DROP TABLE avro_table -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@avro_table -PREHOOK: Output: default@avro_table -POSTHOOK: query: DROP TABLE avro_table -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@avro_table -POSTHOOK: Output: default@avro_table -PREHOOK: query: DROP TABLE parquet_table -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@parquet_table -PREHOOK: Output: default@parquet_table -POSTHOOK: query: DROP TABLE parquet_table -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@parquet_table -POSTHOOK: Output: default@parquet_table diff --git ql/src/test/results/clientpositive/parquet_map_null.q.java1.8.out ql/src/test/results/clientpositive/parquet_map_null.q.java1.8.out deleted file mode 100644 index 1462cc216023cc00e9cbed31df53391b050d213f..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/parquet_map_null.q.java1.8.out +++ /dev/null @@ -1,70 +0,0 @@ -PREHOOK: query: -- This test attempts to write a parquet table from an avro table that contains map null values --- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE IF EXISTS avro_table -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- This test attempts to write a parquet table from an avro table that contains map null values --- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE IF EXISTS avro_table -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE IF EXISTS parquet_table -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE IF EXISTS parquet_table -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@avro_table -POSTHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@avro_table -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@avro_table -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@avro_table -PREHOOK: query: CREATE TABLE parquet_table STORED AS PARQUET AS SELECT * FROM avro_table -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@avro_table -PREHOOK: Output: database:default -PREHOOK: Output: default@parquet_table -POSTHOOK: query: CREATE TABLE parquet_table STORED AS PARQUET AS SELECT * FROM avro_table -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@avro_table -POSTHOOK: Output: database:default -POSTHOOK: Output: default@parquet_table -POSTHOOK: Lineage: parquet_table.avreau_col_1 SIMPLE [(avro_table)avro_table.FieldSchema(name:avreau_col_1, type:map, comment:), ] -PREHOOK: query: SELECT * FROM parquet_table -PREHOOK: type: QUERY -PREHOOK: Input: default@parquet_table -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM parquet_table -POSTHOOK: type: QUERY -POSTHOOK: Input: default@parquet_table -#### A masked pattern was here #### -{"key3":"val3","key4":null} -{"key3":"val3","key4":null} -{"key1":null,"key2":"val2"} -{"key3":"val3","key4":null} -{"key3":"val3","key4":null} -PREHOOK: query: DROP TABLE avro_table -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@avro_table -PREHOOK: Output: default@avro_table -POSTHOOK: query: DROP TABLE avro_table -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@avro_table -POSTHOOK: Output: default@avro_table -PREHOOK: query: DROP TABLE parquet_table -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@parquet_table -PREHOOK: Output: default@parquet_table -POSTHOOK: query: DROP TABLE parquet_table -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@parquet_table -POSTHOOK: Output: default@parquet_table diff --git ql/src/test/results/clientpositive/parquet_map_null.q.out ql/src/test/results/clientpositive/parquet_map_null.q.out new file mode 100644 index 0000000000000000000000000000000000000000..d1357c1f3d3591cf0d860b107dc423540de8f00b --- /dev/null +++ ql/src/test/results/clientpositive/parquet_map_null.q.out @@ -0,0 +1,68 @@ +PREHOOK: query: -- This test attempts to write a parquet table from an avro table that contains map null values + +DROP TABLE IF EXISTS avro_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- This test attempts to write a parquet table from an avro table that contains map null values + +DROP TABLE IF EXISTS avro_table +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS parquet_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS parquet_table +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_table +POSTHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_table +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@avro_table +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@avro_table +PREHOOK: query: CREATE TABLE parquet_table STORED AS PARQUET AS SELECT * FROM avro_table +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@avro_table +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_table +POSTHOOK: query: CREATE TABLE parquet_table STORED AS PARQUET AS SELECT * FROM avro_table +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@avro_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_table +POSTHOOK: Lineage: parquet_table.avreau_col_1 SIMPLE [(avro_table)avro_table.FieldSchema(name:avreau_col_1, type:map, comment:), ] +PREHOOK: query: SELECT * FROM parquet_table +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_table +#### A masked pattern was here #### +{"key3":"val3","key4":null} +{"key3":"val3","key4":null} +{"key1":null,"key2":"val2"} +{"key3":"val3","key4":null} +{"key3":"val3","key4":null} +PREHOOK: query: DROP TABLE avro_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@avro_table +PREHOOK: Output: default@avro_table +POSTHOOK: query: DROP TABLE avro_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@avro_table +POSTHOOK: Output: default@avro_table +PREHOOK: query: DROP TABLE parquet_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_table +PREHOOK: Output: default@parquet_table +POSTHOOK: query: DROP TABLE parquet_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_table +POSTHOOK: Output: default@parquet_table diff --git ql/src/test/results/clientpositive/plan_json.q.java1.7.out ql/src/test/results/clientpositive/plan_json.q.java1.7.out deleted file mode 100644 index dda4adcd6f1cf1ffb8e7ff79b3fbf930b729d47b..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/plan_json.q.java1.7.out +++ /dev/null @@ -1,13 +0,0 @@ -PREHOOK: query: -- explain plan json: the query gets the formatted json output of the query plan of the hive query - --- JAVA_VERSION_SPECIFIC_OUTPUT - -EXPLAIN FORMATTED SELECT count(1) FROM src -PREHOOK: type: QUERY -POSTHOOK: query: -- explain plan json: the query gets the formatted json output of the query plan of the hive query - --- JAVA_VERSION_SPECIFIC_OUTPUT - -EXPLAIN FORMATTED SELECT count(1) FROM src -POSTHOOK: type: QUERY -{"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE","children":{"Select Operator":{"Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE","children":{"Group By Operator":{"aggregations:":["count(1)"],"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","children":{"Reduce Output Operator":{"sort order:":"","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: bigint)"}}}}}}}}],"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} diff --git ql/src/test/results/clientpositive/plan_json.q.java1.8.out ql/src/test/results/clientpositive/plan_json.q.java1.8.out deleted file mode 100644 index dda4adcd6f1cf1ffb8e7ff79b3fbf930b729d47b..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/plan_json.q.java1.8.out +++ /dev/null @@ -1,13 +0,0 @@ -PREHOOK: query: -- explain plan json: the query gets the formatted json output of the query plan of the hive query - --- JAVA_VERSION_SPECIFIC_OUTPUT - -EXPLAIN FORMATTED SELECT count(1) FROM src -PREHOOK: type: QUERY -POSTHOOK: query: -- explain plan json: the query gets the formatted json output of the query plan of the hive query - --- JAVA_VERSION_SPECIFIC_OUTPUT - -EXPLAIN FORMATTED SELECT count(1) FROM src -POSTHOOK: type: QUERY -{"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE","children":{"Select Operator":{"Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE","children":{"Group By Operator":{"aggregations:":["count(1)"],"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","children":{"Reduce Output Operator":{"sort order:":"","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: bigint)"}}}}}}}}],"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} diff --git ql/src/test/results/clientpositive/plan_json.q.out ql/src/test/results/clientpositive/plan_json.q.out new file mode 100644 index 0000000000000000000000000000000000000000..98c6626f9e766489603b11876c6f3b6932bf596e --- /dev/null +++ ql/src/test/results/clientpositive/plan_json.q.out @@ -0,0 +1,11 @@ +PREHOOK: query: -- explain plan json: the query gets the formatted json output of the query plan of the hive query + + +EXPLAIN FORMATTED SELECT count(1) FROM src +PREHOOK: type: QUERY +POSTHOOK: query: -- explain plan json: the query gets the formatted json output of the query plan of the hive query + + +EXPLAIN FORMATTED SELECT count(1) FROM src +POSTHOOK: type: QUERY +{"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-1"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE","children":{"Select Operator":{"Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE","children":{"Group By Operator":{"aggregations:":["count(1)"],"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","children":{"Reduce Output Operator":{"sort order:":"","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: bigint)"}}}}}}}}],"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}} diff --git ql/src/test/results/clientpositive/spark/join0.q.java1.7.out ql/src/test/results/clientpositive/spark/join0.q.java1.7.out deleted file mode 100644 index b3a58d02787cebe25e7437419fd284025cb888d9..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/spark/join0.q.java1.7.out +++ /dev/null @@ -1,238 +0,0 @@ -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT --- SORT_QUERY_RESULTS - -EXPLAIN -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT --- SORT_QUERY_RESULTS - -EXPLAIN -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Map 4 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN FORMATTED -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN FORMATTED -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -#### A masked pattern was here #### -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product -PREHOOK: query: SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 2 val_2 -0 val_0 2 val_2 -0 val_0 2 val_2 -0 val_0 4 val_4 -0 val_0 4 val_4 -0 val_0 4 val_4 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 8 val_8 -0 val_0 8 val_8 -0 val_0 8 val_8 -0 val_0 9 val_9 -0 val_0 9 val_9 -0 val_0 9 val_9 -2 val_2 0 val_0 -2 val_2 0 val_0 -2 val_2 0 val_0 -2 val_2 2 val_2 -2 val_2 4 val_4 -2 val_2 5 val_5 -2 val_2 5 val_5 -2 val_2 5 val_5 -2 val_2 8 val_8 -2 val_2 9 val_9 -4 val_4 0 val_0 -4 val_4 0 val_0 -4 val_4 0 val_0 -4 val_4 2 val_2 -4 val_4 4 val_4 -4 val_4 5 val_5 -4 val_4 5 val_5 -4 val_4 5 val_5 -4 val_4 8 val_8 -4 val_4 9 val_9 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 2 val_2 -5 val_5 2 val_2 -5 val_5 2 val_2 -5 val_5 4 val_4 -5 val_5 4 val_4 -5 val_5 4 val_4 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 8 val_8 -5 val_5 8 val_8 -5 val_5 8 val_8 -5 val_5 9 val_9 -5 val_5 9 val_9 -5 val_5 9 val_9 -8 val_8 0 val_0 -8 val_8 0 val_0 -8 val_8 0 val_0 -8 val_8 2 val_2 -8 val_8 4 val_4 -8 val_8 5 val_5 -8 val_8 5 val_5 -8 val_8 5 val_5 -8 val_8 8 val_8 -8 val_8 9 val_9 -9 val_9 0 val_0 -9 val_9 0 val_0 -9 val_9 0 val_0 -9 val_9 2 val_2 -9 val_9 4 val_4 -9 val_9 5 val_5 -9 val_9 5 val_5 -9 val_9 5 val_5 -9 val_9 8 val_8 -9 val_9 9 val_9 diff --git ql/src/test/results/clientpositive/spark/join0.q.java1.8.out ql/src/test/results/clientpositive/spark/join0.q.java1.8.out deleted file mode 100644 index 7acd1087e8e37dbe4ddfd82f0f20f0082b8a3c7b..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/spark/join0.q.java1.8.out +++ /dev/null @@ -1,238 +0,0 @@ -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT --- SORT_QUERY_RESULTS - -EXPLAIN -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT --- SORT_QUERY_RESULTS - -EXPLAIN -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Map 4 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN FORMATTED -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN FORMATTED -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -#### A masked pattern was here #### -Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product -PREHOOK: query: SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 2 val_2 -0 val_0 2 val_2 -0 val_0 2 val_2 -0 val_0 4 val_4 -0 val_0 4 val_4 -0 val_0 4 val_4 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 8 val_8 -0 val_0 8 val_8 -0 val_0 8 val_8 -0 val_0 9 val_9 -0 val_0 9 val_9 -0 val_0 9 val_9 -2 val_2 0 val_0 -2 val_2 0 val_0 -2 val_2 0 val_0 -2 val_2 2 val_2 -2 val_2 4 val_4 -2 val_2 5 val_5 -2 val_2 5 val_5 -2 val_2 5 val_5 -2 val_2 8 val_8 -2 val_2 9 val_9 -4 val_4 0 val_0 -4 val_4 0 val_0 -4 val_4 0 val_0 -4 val_4 2 val_2 -4 val_4 4 val_4 -4 val_4 5 val_5 -4 val_4 5 val_5 -4 val_4 5 val_5 -4 val_4 8 val_8 -4 val_4 9 val_9 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 2 val_2 -5 val_5 2 val_2 -5 val_5 2 val_2 -5 val_5 4 val_4 -5 val_5 4 val_4 -5 val_5 4 val_4 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 8 val_8 -5 val_5 8 val_8 -5 val_5 8 val_8 -5 val_5 9 val_9 -5 val_5 9 val_9 -5 val_5 9 val_9 -8 val_8 0 val_0 -8 val_8 0 val_0 -8 val_8 0 val_0 -8 val_8 2 val_2 -8 val_8 4 val_4 -8 val_8 5 val_5 -8 val_8 5 val_5 -8 val_8 5 val_5 -8 val_8 8 val_8 -8 val_8 9 val_9 -9 val_9 0 val_0 -9 val_9 0 val_0 -9 val_9 0 val_0 -9 val_9 2 val_2 -9 val_9 4 val_4 -9 val_9 5 val_5 -9 val_9 5 val_5 -9 val_9 5 val_5 -9 val_9 8 val_8 -9 val_9 9 val_9 diff --git ql/src/test/results/clientpositive/spark/join0.q.out ql/src/test/results/clientpositive/spark/join0.q.out index 56b154ff799097c78dc5d66a7b7cc3c303b88b00..bc98bb4fd904a5e70734d986dc7dbe306cf184db 100644 --- ql/src/test/results/clientpositive/spark/join0.q.out +++ ql/src/test/results/clientpositive/spark/join0.q.out @@ -1,5 +1,7 @@ Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM (SELECT * FROM src WHERE src.key < 10) src1 @@ -7,7 +9,9 @@ SELECT src1.key as k1, src1.value as v1, (SELECT * FROM src WHERE src.key < 10) src2 SORT BY k1, v1, k2, v2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM (SELECT * FROM src WHERE src.key < 10) src1 @@ -24,7 +28,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -64,9 +68,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} {VALUE._col1} + keys: + 0 + 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -83,8 +87,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 diff --git ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.java1.7.out ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.java1.7.out deleted file mode 100644 index 30405443a8b69f75295278aee1eb035d096cea49..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.java1.7.out +++ /dev/null @@ -1,252 +0,0 @@ -PREHOOK: query: -- run this test case in minimr to ensure it works in cluster --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key) on ('484','51','103') - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- run this test case in minimr to ensure it works in cluster --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key) on ('484','51','103') - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_static_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 - numRows 500 - rawDataSize 4812 - totalSize 5520 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key] -Skewed Values: [[484], [51], [103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484, [103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103, [51]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=51} -Storage Desc Params: - serialization.format 1 diff --git ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.java1.8.out ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.java1.8.out deleted file mode 100644 index 12f41eb7261e8bcb17cfccae74d79593d61c6c04..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.java1.8.out +++ /dev/null @@ -1,280 +0,0 @@ -PREHOOK: query: -- run this test case in minimr to ensure it works in cluster --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key) on ('484','51','103') - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- run this test case in minimr to ensure it works in cluster --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key) on ('484','51','103') - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_static_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - src - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - list_bucketing_static_part - TOK_PARTSPEC - TOK_PARTVAL - ds - '2008-04-08' - TOK_PARTVAL - hr - '11' - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - value - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 4 - numRows 500 - rawDataSize 4812 - totalSize 5520 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key] -Skewed Values: [[484], [51], [103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103, [51]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=51, [484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484} -Storage Desc Params: - serialization.format 1 diff --git ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out new file mode 100644 index 0000000000000000000000000000000000000000..9eca85a0be92b95d78f3c566427eeb327d7e7f1a --- /dev/null +++ ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out @@ -0,0 +1,250 @@ +PREHOOK: query: -- run this test case in minimr to ensure it works in cluster + +-- list bucketing DML: static partition. multiple skewed columns. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key) on ('484','51','103') + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_static_part +POSTHOOK: query: -- run this test case in minimr to ensure it works in cluster + +-- list bucketing DML: static partition. multiple skewed columns. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key) on ('484','51','103') + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_static_part +PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 11 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- check DML result +show partitions list_bucketing_static_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: -- check DML result +show partitions list_bucketing_static_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_static_part +ds=2008-04-08/hr=11 +PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_static_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: list_bucketing_static_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 4 + numRows 500 + rawDataSize 4812 + totalSize 5520 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key] +Skewed Values: [[484], [51], [103]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103, [51]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=51, [484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484} +Storage Desc Params: + serialization.format 1 diff --git ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.java1.7.out ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.java1.7.out deleted file mode 100644 index d8da70cb2e1d7e72535326ccc99a0fc4bf8200fa..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.java1.7.out +++ /dev/null @@ -1,591 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_static_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [srcpart] - /srcpart/ds=2008-04-08/hr=12 [srcpart] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 6 - numRows 1000 - rawDataSize 9624 - totalSize 10898 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[103, val_103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103/value=val_103, [484, val_484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484/value=val_484} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -1000 -PREHOOK: query: select count(*) from list_bucketing_static_part -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from list_bucketing_static_part -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -1000 -PREHOOK: query: explain extended -select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - numFiles 6 - numRows 1000 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 9624 - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10898 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - Processor Tree: - TableScan - alias: list_bucketing_static_part - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink - -PREHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 11 -PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 12 -PREHOOK: query: -- 51 and val_51 in the table so skewed data for 51 and val_14 should be none --- but query should succeed for 51 or 51 and val_14 -select * from srcpart where ds = '2008-04-08' and key = '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: -- 51 and val_51 in the table so skewed data for 51 and val_14 should be none --- but query should succeed for 51 or 51 and val_14 -select * from srcpart where ds = '2008-04-08' and key = '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 12 -51 val_51 2008-04-08 12 -PREHOOK: query: select * from list_bucketing_static_part where key = '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_static_part where key = '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 11 -PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '51' and value = 'val_14' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '51' and value = 'val_14' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -PREHOOK: query: select * from list_bucketing_static_part where key = '51' and value = 'val_14' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_static_part where key = '51' and value = 'val_14' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -PREHOOK: query: -- queries with < <= > >= should work for skewed test although we don't benefit from pruning -select count(1) from srcpart where ds = '2008-04-08' and key < '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: -- queries with < <= > >= should work for skewed test although we don't benefit from pruning -select count(1) from srcpart where ds = '2008-04-08' and key < '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -910 -PREHOOK: query: select count(1) from list_bucketing_static_part where key < '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from list_bucketing_static_part where key < '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -910 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key <= '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key <= '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -914 -PREHOOK: query: select count(1) from list_bucketing_static_part where key <= '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from list_bucketing_static_part where key <= '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -914 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key > '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key > '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -86 -PREHOOK: query: select count(1) from list_bucketing_static_part where key > '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from list_bucketing_static_part where key > '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -86 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key >= '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key >= '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -90 -PREHOOK: query: select count(1) from list_bucketing_static_part where key >= '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from list_bucketing_static_part where key >= '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -90 -PREHOOK: query: -- clean up -drop table list_bucketing_static_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- clean up -drop table list_bucketing_static_part -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Output: default@list_bucketing_static_part diff --git ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.java1.8.out ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.java1.8.out deleted file mode 100644 index 23dc6a3028b3d4169f622c46ba0ac35428449992..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.java1.8.out +++ /dev/null @@ -1,663 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - --- list bucketing DML: static partition. multiple skewed columns. --- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: --- 5263 000000_0 --- 5263 000001_0 --- ds=2008-04-08/hr=11/key=103/value=val_103: --- 99 000000_0 --- 99 000001_0 --- ds=2008-04-08/hr=11/key=484/value=val_484: --- 87 000000_0 --- 87 000001_0 - --- create a skewed table -create table list_bucketing_static_part (key String, value String) - partitioned by (ds String, hr String) - skewed by (key, value) on (('484','val_484'),('51','val_14'),('103','val_103')) - stored as DIRECTORIES - STORED AS RCFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@list_bucketing_static_part -PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. -explain extended -insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - srcpart - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - list_bucketing_static_part - TOK_PARTSPEC - TOK_PARTVAL - ds - '2008-04-08' - TOK_PARTVAL - hr - '11' - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - value - TOK_WHERE - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2008-04-08/hr=11/ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [srcpart] - /srcpart/ds=2008-04-08/hr=12 [srcpart] - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2008-04-08 - hr 11 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') -select key, value from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- check DML result -show partitions list_bucketing_static_part -PREHOOK: type: SHOWPARTITIONS -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: -- check DML result -show partitions list_bucketing_static_part -POSTHOOK: type: SHOWPARTITIONS -POSTHOOK: Input: default@list_bucketing_static_part -ds=2008-04-08/hr=11 -PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@list_bucketing_static_part -POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@list_bucketing_static_part -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: list_bucketing_static_part -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 6 - numRows 1000 - rawDataSize 9624 - totalSize 10898 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [key, value] -Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484/value=val_484, [103, val_103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103/value=val_103} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -1000 -PREHOOK: query: select count(*) from list_bucketing_static_part -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from list_bucketing_static_part -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -1000 -PREHOOK: query: explain extended -select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - list_bucketing_static_part - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - and - and - and - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - = - TOK_TABLE_OR_COL - hr - '11' - = - TOK_TABLE_OR_COL - key - '484' - = - TOK_TABLE_OR_COL - value - 'val_484' - - -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: - Partition - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - numFiles 6 - numRows 1000 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 9624 - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - totalSize 10898 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.list_bucketing_static_part - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct list_bucketing_static_part { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.list_bucketing_static_part - name: default.list_bucketing_static_part - Processor Tree: - TableScan - alias: list_bucketing_static_part - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - ListSink - -PREHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 11 -PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -484 val_484 2008-04-08 11 -484 val_484 2008-04-08 12 -PREHOOK: query: -- 51 and val_51 in the table so skewed data for 51 and val_14 should be none --- but query should succeed for 51 or 51 and val_14 -select * from srcpart where ds = '2008-04-08' and key = '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: -- 51 and val_51 in the table so skewed data for 51 and val_14 should be none --- but query should succeed for 51 or 51 and val_14 -select * from srcpart where ds = '2008-04-08' and key = '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 12 -51 val_51 2008-04-08 12 -PREHOOK: query: select * from list_bucketing_static_part where key = '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_static_part where key = '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 11 -51 val_51 2008-04-08 11 -PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '51' and value = 'val_14' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '51' and value = 'val_14' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -PREHOOK: query: select * from list_bucketing_static_part where key = '51' and value = 'val_14' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select * from list_bucketing_static_part where key = '51' and value = 'val_14' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -PREHOOK: query: -- queries with < <= > >= should work for skewed test although we don't benefit from pruning -select count(1) from srcpart where ds = '2008-04-08' and key < '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: -- queries with < <= > >= should work for skewed test although we don't benefit from pruning -select count(1) from srcpart where ds = '2008-04-08' and key < '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -910 -PREHOOK: query: select count(1) from list_bucketing_static_part where key < '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from list_bucketing_static_part where key < '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -910 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key <= '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key <= '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -914 -PREHOOK: query: select count(1) from list_bucketing_static_part where key <= '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from list_bucketing_static_part where key <= '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -914 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key > '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key > '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -86 -PREHOOK: query: select count(1) from list_bucketing_static_part where key > '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from list_bucketing_static_part where key > '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -86 -PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key >= '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from srcpart where ds = '2008-04-08' and key >= '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -90 -PREHOOK: query: select count(1) from list_bucketing_static_part where key >= '51' -PREHOOK: type: QUERY -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from list_bucketing_static_part where key >= '51' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11 -#### A masked pattern was here #### -90 -PREHOOK: query: -- clean up -drop table list_bucketing_static_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@list_bucketing_static_part -PREHOOK: Output: default@list_bucketing_static_part -POSTHOOK: query: -- clean up -drop table list_bucketing_static_part -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@list_bucketing_static_part -POSTHOOK: Output: default@list_bucketing_static_part diff --git ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out index 3ee9b5ae3f297df97c8c8fd71bf5231abac702d3..c83c02ed24e1dd0e9d453e3e49c9e7f89a82e26e 100644 --- ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out +++ ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out @@ -54,39 +54,6 @@ explain extended insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - srcpart - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - list_bucketing_static_part - TOK_PARTSPEC - TOK_PARTVAL - ds - '2008-04-08' - TOK_PARTVAL - hr - '11' - TOK_SELECT - TOK_SELEXPR - TOK_TABLE_OR_COL - key - TOK_SELEXPR - TOK_TABLE_OR_COL - value - TOK_WHERE - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -121,7 +88,7 @@ STAGE PLANS: properties: bucket_count -1 columns key,value - columns.comments + columns.comments columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part @@ -148,10 +115,10 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE true + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 columns key,value - columns.comments defaultdefault + columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### name default.srcpart @@ -172,7 +139,7 @@ STAGE PLANS: properties: bucket_count -1 columns key,value - columns.comments defaultdefault + columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### name default.srcpart @@ -194,10 +161,10 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE true + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 columns key,value - columns.comments defaultdefault + columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### name default.srcpart @@ -218,7 +185,7 @@ STAGE PLANS: properties: bucket_count -1 columns key,value - columns.comments defaultdefault + columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### name default.srcpart @@ -249,7 +216,7 @@ STAGE PLANS: properties: bucket_count -1 columns key,value - columns.comments + columns.comments columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part @@ -314,7 +281,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE true + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} numFiles 6 numRows 1000 rawDataSize 9624 @@ -333,7 +300,7 @@ Stored As SubDirectories: Yes Skewed Columns: [key, value] Skewed Values: [[484, val_484], [51, val_14], [103, val_103]] #### A masked pattern was here #### -Skewed Value to Truncated Path: {[103, val_103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103/value=val_103, [484, val_484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484/value=val_484} +Skewed Value to Truncated Path: {[484, val_484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484/value=val_484, [103, val_103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103/value=val_103} Storage Desc Params: serialization.format 1 PREHOOK: query: select count(1) from srcpart where ds = '2008-04-08' @@ -366,42 +333,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_TABREF - TOK_TABNAME - list_bucketing_static_part - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_ALLCOLREF - TOK_WHERE - and - and - and - = - TOK_TABLE_OR_COL - ds - '2008-04-08' - = - TOK_TABLE_OR_COL - hr - '11' - = - TOK_TABLE_OR_COL - key - '484' - = - TOK_TABLE_OR_COL - value - 'val_484' - - STAGE DEPENDENCIES: Stage-0 is a root stage @@ -417,10 +348,10 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE true + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucket_count -1 columns key,value - columns.comments + columns.comments columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part @@ -441,7 +372,7 @@ STAGE PLANS: properties: bucket_count -1 columns key,value - columns.comments + columns.comments columns.types string:string #### A masked pattern was here #### name default.list_bucketing_static_part @@ -457,16 +388,13 @@ STAGE PLANS: Processor Tree: TableScan alias: list_bucketing_static_part - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' diff --git ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.7.out ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.7.out deleted file mode 100644 index 68943e1419cbeaafd630dea0944e523e71abba15..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.7.out +++ /dev/null @@ -1,709 +0,0 @@ -PREHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -PREHOOK: type: QUERY -POSTHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string), _col2 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - /srcpart/ds=2008-04-09/hr=11 [b] - /srcpart/ds=2008-04-09/hr=12 [b] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - filter mappings: - 1 [0, 1] - filter predicates: - 0 - 1 {(VALUE._col1 = '2008-04-08')} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0) and (UDFToDouble(_col2) > 15.0) and (UDFToDouble(_col2) < 25.0)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -#### A masked pattern was here #### -17 val_17 17 val_17 -17 val_17 17 val_17 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -19 val_19 19 val_19 -19 val_19 19 val_19 -PREHOOK: query: EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"key":"true","value":"true"},"BASIC_STATS":"true"} - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -17 val_17 17 val_17 -17 val_17 17 val_17 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -19 val_19 19 val_19 -19 val_19 19 val_19 diff --git ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.8.out ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.8.out deleted file mode 100644 index c3454eee3e7ea512e31b39996f83ade8e77a3b8f..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/spark/outer_join_ppr.q.java1.8.out +++ /dev/null @@ -1,879 +0,0 @@ -PREHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -PREHOOK: type: QUERY -POSTHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_FULLOUTERJOIN - TOK_TABREF - TOK_TABNAME - src - a - TOK_TABREF - TOK_TABNAME - srcpart - b - AND - = - . - TOK_TABLE_OR_COL - a - key - . - TOK_TABLE_OR_COL - b - key - = - . - TOK_TABLE_OR_COL - b - ds - '2008-04-08' - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - . - TOK_TABLE_OR_COL - a - key - TOK_SELEXPR - . - TOK_TABLE_OR_COL - a - value - TOK_SELEXPR - . - TOK_TABLE_OR_COL - b - key - TOK_SELEXPR - . - TOK_TABLE_OR_COL - b - value - TOK_WHERE - AND - AND - AND - > - . - TOK_TABLE_OR_COL - a - key - 10 - < - . - TOK_TABLE_OR_COL - a - key - 20 - > - . - TOK_TABLE_OR_COL - b - key - 15 - < - . - TOK_TABLE_OR_COL - b - key - 25 - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - /srcpart/ds=2008-04-09/hr=11 [b] - /srcpart/ds=2008-04-09/hr=12 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - filter mappings: - 0 [1, 1] - filter predicates: - 0 {(VALUE._col1 = '2008-04-08')} - 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) and (UDFToDouble(_col0) > 15.0)) and (UDFToDouble(_col0) < 25.0)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key AND b.ds = '2008-04-08') - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -#### A masked pattern was here #### -17 val_17 17 val_17 -17 val_17 17 val_17 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -19 val_19 19 val_19 -19 val_19 19 val_19 -PREHOOK: query: EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED - FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_FULLOUTERJOIN - TOK_TABREF - TOK_TABNAME - src - a - TOK_TABREF - TOK_TABNAME - srcpart - b - = - . - TOK_TABLE_OR_COL - a - key - . - TOK_TABLE_OR_COL - b - key - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - . - TOK_TABLE_OR_COL - a - key - TOK_SELEXPR - . - TOK_TABLE_OR_COL - a - value - TOK_SELEXPR - . - TOK_TABLE_OR_COL - b - key - TOK_SELEXPR - . - TOK_TABLE_OR_COL - b - value - TOK_WHERE - AND - AND - AND - AND - > - . - TOK_TABLE_OR_COL - a - key - 10 - < - . - TOK_TABLE_OR_COL - a - key - 20 - > - . - TOK_TABLE_OR_COL - b - key - 15 - < - . - TOK_TABLE_OR_COL - b - key - 25 - = - . - TOK_TABLE_OR_COL - b - ds - '2008-04-08' - - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [a] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((UDFToDouble(_col3) > 10.0) and (UDFToDouble(_col3) < 20.0)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col4 (type: string), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: FROM - src a - FULL OUTER JOIN - srcpart b - ON (a.key = b.key) - SELECT a.key, a.value, b.key, b.value - WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -17 val_17 17 val_17 -17 val_17 17 val_17 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -18 val_18 18 val_18 -19 val_19 19 val_19 -19 val_19 19 val_19 diff --git ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out index 217fe76b8212020d6b0ff3f518e930521103f8d5..dfa6ea5e94dcd5f044983179b8090d582f14f7f4 100644 --- ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out +++ ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out @@ -20,90 +20,6 @@ EXPLAIN EXTENDED SELECT a.key, a.value, b.key, b.value WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_FULLOUTERJOIN - TOK_TABREF - TOK_TABNAME - src - a - TOK_TABREF - TOK_TABNAME - srcpart - b - AND - = - . - TOK_TABLE_OR_COL - a - key - . - TOK_TABLE_OR_COL - b - key - = - . - TOK_TABLE_OR_COL - b - ds - '2008-04-08' - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - . - TOK_TABLE_OR_COL - a - key - TOK_SELEXPR - . - TOK_TABLE_OR_COL - a - value - TOK_SELEXPR - . - TOK_TABLE_OR_COL - b - key - TOK_SELEXPR - . - TOK_TABLE_OR_COL - b - value - TOK_WHERE - AND - AND - AND - > - . - TOK_TABLE_OR_COL - a - key - 10 - < - . - TOK_TABLE_OR_COL - a - key - 20 - > - . - TOK_TABLE_OR_COL - b - key - 15 - < - . - TOK_TABLE_OR_COL - b - key - 25 - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -112,7 +28,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -121,14 +37,19 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -138,7 +59,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 columns key,value columns.comments 'default','default' @@ -158,7 +79,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 columns key,value columns.comments 'default','default' @@ -184,14 +105,19 @@ STAGE PLANS: alias: b Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string), ds (type: string) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string), _col2 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -204,7 +130,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE true + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 columns key,value columns.comments 'default','default' @@ -250,7 +176,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE true + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 columns key,value columns.comments 'default','default' @@ -296,7 +222,7 @@ STAGE PLANS: ds 2008-04-09 hr 11 properties: - COLUMN_STATS_ACCURATE true + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 columns key,value columns.comments 'default','default' @@ -342,7 +268,7 @@ STAGE PLANS: ds 2008-04-09 hr 12 properties: - COLUMN_STATS_ACCURATE true + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 columns key,value columns.comments 'default','default' @@ -396,39 +322,36 @@ STAGE PLANS: 0 1 {(VALUE._col1 = '2008-04-08')} keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: ((((_col5 > 15) and (_col5 < 25)) and (_col0 > 10)) and (_col0 < 20)) (type: boolean) + predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0) and (UDFToDouble(_col2) > 15.0) and (UDFToDouble(_col2) < 25.0)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -496,90 +419,6 @@ POSTHOOK: query: EXPLAIN EXTENDED SELECT a.key, a.value, b.key, b.value WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_FULLOUTERJOIN - TOK_TABREF - TOK_TABNAME - src - a - TOK_TABREF - TOK_TABNAME - srcpart - b - = - . - TOK_TABLE_OR_COL - a - key - . - TOK_TABLE_OR_COL - b - key - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - . - TOK_TABLE_OR_COL - a - key - TOK_SELEXPR - . - TOK_TABLE_OR_COL - a - value - TOK_SELEXPR - . - TOK_TABLE_OR_COL - b - key - TOK_SELEXPR - . - TOK_TABLE_OR_COL - b - value - TOK_WHERE - AND - AND - AND - AND - > - . - TOK_TABLE_OR_COL - a - key - 10 - < - . - TOK_TABLE_OR_COL - a - key - 20 - > - . - TOK_TABLE_OR_COL - b - key - 15 - < - . - TOK_TABLE_OR_COL - b - key - 25 - = - . - TOK_TABLE_OR_COL - b - ds - '2008-04-08' - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -588,7 +427,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -597,14 +436,23 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -614,7 +462,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 columns key,value columns.comments 'default','default' @@ -634,7 +482,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 columns key,value columns.comments 'default','default' @@ -658,16 +506,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string), ds (type: string) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: ((UDFToDouble(key) > 15.0) and (UDFToDouble(key) < 25.0)) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -680,7 +537,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE true + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 columns key,value columns.comments 'default','default' @@ -726,99 +583,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 11 - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart -#### A masked pattern was here #### - Partition - base file name: hr=12 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 - properties: - COLUMN_STATS_ACCURATE true + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 columns key,value columns.comments 'default','default' @@ -858,48 +623,43 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [b] /srcpart/ds=2008-04-08/hr=12 [b] - /srcpart/ds=2008-04-09/hr=11 [b] - /srcpart/ds=2008-04-09/hr=12 [b] Reducer 2 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Right Outer Join0 to 1 keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col7 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: (((((_col5 > 15) and (_col5 < 25)) and (_col7 = '2008-04-08')) and (_col0 > 10)) and (_col0 < 20)) (type: boolean) + predicate: ((UDFToDouble(_col0) > 10.0) and (UDFToDouble(_col0) < 20.0)) (type: boolean) Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -919,8 +679,6 @@ PREHOOK: Input: default@src PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: FROM src a @@ -934,8 +692,6 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 17 val_17 17 val_17 17 val_17 17 val_17 diff --git ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.7.out ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.7.out deleted file mode 100644 index b43ea5cba519934ba8d69b95b56acf68d2239e79..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.7.out +++ /dev/null @@ -1,886 +0,0 @@ -PREHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -CREATE TABLE src_4( - key STRING, - value STRING -) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@src_4 -POSTHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -CREATE TABLE src_4( - key STRING, - value STRING -) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_4 -RUN: Stage-0:DDL -PREHOOK: query: CREATE TABLE src_5( - key STRING, - value STRING -) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@src_5 -POSTHOOK: query: CREATE TABLE src_5( - key STRING, - value STRING -) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_5 -RUN: Stage-0:DDL -Warning: Shuffle Join JOIN[31][tables = [sq_2_notin_nullcheck]] in Work 'Reducer 2' is a cross product -PREHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -POSTHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-2 - Spark - Edges: - Reducer 2 <- Map 10 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) - Reducer 9 <- Map 8 (GROUP, 1) - Reducer 4 <- Reducer 3 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 10 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) - Map 11 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 6 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 7 - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '2') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 8 - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '2') and key is null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col5 is null (type: boolean) - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - Reducer 9 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: 0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-3 - Stats-Aggr Operator - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - - Stage: Stage-4 - Stats-Aggr Operator - -Warning: Shuffle Join JOIN[31][tables = [sq_2_notin_nullcheck]] in Work 'Reducer 2' is a cross product -PREHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@src_4 -PREHOOK: Output: default@src_5 -POSTHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@src_4 -POSTHOOK: Output: default@src_5 -POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-2:MAPRED -RUN: Stage-1:MOVE -RUN: Stage-0:MOVE -RUN: Stage-3:STATS -RUN: Stage-4:STATS -PREHOOK: query: select * from src_4 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_4 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_4 -#### A masked pattern was here #### -90 val_90 -90 val_90 -90 val_90 -92 val_92 -95 val_95 -95 val_95 -96 val_96 -97 val_97 -97 val_97 -98 val_98 -98 val_98 -PREHOOK: query: select * from src_5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_5 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_5 -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -10 val_10 -100 val_100 -100 val_100 -103 val_103 -103 val_103 -104 val_104 -104 val_104 -105 val_105 -11 val_11 -111 val_111 -113 val_113 -113 val_113 -114 val_114 -116 val_116 -118 val_118 -118 val_118 -119 val_119 -119 val_119 -119 val_119 -12 val_12 -12 val_12 -120 val_120 -120 val_120 -125 val_125 -125 val_125 -126 val_126 -128 val_128 -128 val_128 -128 val_128 -129 val_129 -129 val_129 -131 val_131 -133 val_133 -134 val_134 -134 val_134 -136 val_136 -137 val_137 -137 val_137 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -143 val_143 -145 val_145 -146 val_146 -146 val_146 -149 val_149 -149 val_149 -15 val_15 -15 val_15 -150 val_150 -152 val_152 -152 val_152 -153 val_153 -155 val_155 -156 val_156 -157 val_157 -158 val_158 -160 val_160 -162 val_162 -163 val_163 -164 val_164 -164 val_164 -165 val_165 -165 val_165 -166 val_166 -167 val_167 -167 val_167 -167 val_167 -168 val_168 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -17 val_17 -170 val_170 -172 val_172 -172 val_172 -174 val_174 -174 val_174 -175 val_175 -175 val_175 -176 val_176 -176 val_176 -177 val_177 -178 val_178 -179 val_179 -179 val_179 -18 val_18 -18 val_18 -180 val_180 -181 val_181 -183 val_183 -186 val_186 -187 val_187 -187 val_187 -187 val_187 -189 val_189 -19 val_19 -190 val_190 -191 val_191 -191 val_191 -192 val_192 -193 val_193 -193 val_193 -193 val_193 -194 val_194 -195 val_195 -195 val_195 -196 val_196 -197 val_197 -197 val_197 -199 val_199 -199 val_199 -199 val_199 -2 val_2 -Warning: Map Join MAPJOIN[46][bigTable=b] in task 'Stage-2:MAPRED' is a cross product -PREHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -POSTHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-5 - Spark - Edges: - Reducer 6 <- Map 5 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '2') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - Map 5 - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '2') and key is null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 6 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: 0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 - - Stage: Stage-2 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - input vertices: - 1 Reducer 6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5 - input vertices: - 1 Map 4 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col5 is null (type: boolean) - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - Local Work: - Map Reduce Local Work - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-3 - Stats-Aggr Operator - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - - Stage: Stage-4 - Stats-Aggr Operator - -Warning: Map Join MAPJOIN[46][bigTable=b] in task 'Stage-2:MAPRED' is a cross product -PREHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@src_4 -PREHOOK: Output: default@src_5 -POSTHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@src_4 -POSTHOOK: Output: default@src_5 -POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-5:MAPRED -RUN: Stage-2:MAPRED -RUN: Stage-1:MOVE -RUN: Stage-0:MOVE -RUN: Stage-3:STATS -RUN: Stage-4:STATS -PREHOOK: query: select * from src_4 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_4 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_4 -#### A masked pattern was here #### -90 val_90 -90 val_90 -90 val_90 -92 val_92 -95 val_95 -95 val_95 -96 val_96 -97 val_97 -97 val_97 -98 val_98 -98 val_98 -PREHOOK: query: select * from src_5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_5 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_5 -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -10 val_10 -100 val_100 -100 val_100 -103 val_103 -103 val_103 -104 val_104 -104 val_104 -105 val_105 -11 val_11 -111 val_111 -113 val_113 -113 val_113 -114 val_114 -116 val_116 -118 val_118 -118 val_118 -119 val_119 -119 val_119 -119 val_119 -12 val_12 -12 val_12 -120 val_120 -120 val_120 -125 val_125 -125 val_125 -126 val_126 -128 val_128 -128 val_128 -128 val_128 -129 val_129 -129 val_129 -131 val_131 -133 val_133 -134 val_134 -134 val_134 -136 val_136 -137 val_137 -137 val_137 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -143 val_143 -145 val_145 -146 val_146 -146 val_146 -149 val_149 -149 val_149 -15 val_15 -15 val_15 -150 val_150 -152 val_152 -152 val_152 -153 val_153 -155 val_155 -156 val_156 -157 val_157 -158 val_158 -160 val_160 -162 val_162 -163 val_163 -164 val_164 -164 val_164 -165 val_165 -165 val_165 -166 val_166 -167 val_167 -167 val_167 -167 val_167 -168 val_168 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -17 val_17 -170 val_170 -172 val_172 -172 val_172 -174 val_174 -174 val_174 -175 val_175 -175 val_175 -176 val_176 -176 val_176 -177 val_177 -178 val_178 -179 val_179 -179 val_179 -18 val_18 -18 val_18 -180 val_180 -181 val_181 -183 val_183 -186 val_186 -187 val_187 -187 val_187 -187 val_187 -189 val_189 -19 val_19 -190 val_190 -191 val_191 -191 val_191 -192 val_192 -193 val_193 -193 val_193 -193 val_193 -194 val_194 -195 val_195 -195 val_195 -196 val_196 -197 val_197 -197 val_197 -199 val_199 -199 val_199 -199 val_199 -2 val_2 diff --git ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.8.out ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.8.out deleted file mode 100644 index 1bfdba20995aa48dc2981cefe553170a93431d67..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.8.out +++ /dev/null @@ -1,890 +0,0 @@ -PREHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -CREATE TABLE src_4( - key STRING, - value STRING -) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@src_4 -POSTHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -CREATE TABLE src_4( - key STRING, - value STRING -) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_4 -RUN: Stage-0:DDL -PREHOOK: query: CREATE TABLE src_5( - key STRING, - value STRING -) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@src_5 -POSTHOOK: query: CREATE TABLE src_5( - key STRING, - value STRING -) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_5 -RUN: Stage-0:DDL -Warning: Shuffle Join JOIN[31][tables = [sq_2_notin_nullcheck]] in Work 'Reducer 2' is a cross product -PREHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -POSTHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-2 - Spark - Edges: - Reducer 2 <- Map 10 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) - Reducer 9 <- Map 8 (GROUP, 1) - Reducer 4 <- Reducer 3 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 10 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) - Map 11 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 6 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Map 7 - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '2') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 8 - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '2') and key is null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col5 is null (type: boolean) - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - Reducer 9 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-3 - Stats-Aggr Operator - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - - Stage: Stage-4 - Stats-Aggr Operator - -Warning: Shuffle Join JOIN[31][tables = [sq_2_notin_nullcheck]] in Work 'Reducer 2' is a cross product -PREHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@src_4 -PREHOOK: Output: default@src_5 -POSTHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@src_4 -POSTHOOK: Output: default@src_5 -POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-2:MAPRED -RUN: Stage-1:MOVE -RUN: Stage-0:MOVE -RUN: Stage-3:STATS -RUN: Stage-4:STATS -PREHOOK: query: select * from src_4 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_4 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_4 -#### A masked pattern was here #### -90 val_90 -90 val_90 -90 val_90 -92 val_92 -95 val_95 -95 val_95 -96 val_96 -97 val_97 -97 val_97 -98 val_98 -98 val_98 -PREHOOK: query: select * from src_5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_5 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_5 -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -10 val_10 -100 val_100 -100 val_100 -103 val_103 -103 val_103 -104 val_104 -104 val_104 -105 val_105 -11 val_11 -111 val_111 -113 val_113 -113 val_113 -114 val_114 -116 val_116 -118 val_118 -118 val_118 -119 val_119 -119 val_119 -119 val_119 -12 val_12 -12 val_12 -120 val_120 -120 val_120 -125 val_125 -125 val_125 -126 val_126 -128 val_128 -128 val_128 -128 val_128 -129 val_129 -129 val_129 -131 val_131 -133 val_133 -134 val_134 -134 val_134 -136 val_136 -137 val_137 -137 val_137 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -143 val_143 -145 val_145 -146 val_146 -146 val_146 -149 val_149 -149 val_149 -15 val_15 -15 val_15 -150 val_150 -152 val_152 -152 val_152 -153 val_153 -155 val_155 -156 val_156 -157 val_157 -158 val_158 -160 val_160 -162 val_162 -163 val_163 -164 val_164 -164 val_164 -165 val_165 -165 val_165 -166 val_166 -167 val_167 -167 val_167 -167 val_167 -168 val_168 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -17 val_17 -170 val_170 -172 val_172 -172 val_172 -174 val_174 -174 val_174 -175 val_175 -175 val_175 -176 val_176 -176 val_176 -177 val_177 -178 val_178 -179 val_179 -179 val_179 -18 val_18 -18 val_18 -180 val_180 -181 val_181 -183 val_183 -186 val_186 -187 val_187 -187 val_187 -187 val_187 -189 val_189 -19 val_19 -190 val_190 -191 val_191 -191 val_191 -192 val_192 -193 val_193 -193 val_193 -193 val_193 -194 val_194 -195 val_195 -195 val_195 -196 val_196 -197 val_197 -197 val_197 -199 val_199 -199 val_199 -199 val_199 -2 val_2 -Warning: Map Join MAPJOIN[46][bigTable=b] in task 'Stage-2:MAPRED' is a cross product -PREHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -POSTHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-5 - Spark - Edges: - Reducer 6 <- Map 5 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '2') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - Map 5 - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '2') and key is null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 6 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 - - Stage: Stage-2 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - input vertices: - 1 Reducer 6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5 - input vertices: - 1 Map 4 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col5 is null (type: boolean) - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - Local Work: - Map Reduce Local Work - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-3 - Stats-Aggr Operator - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - - Stage: Stage-4 - Stats-Aggr Operator - -Warning: Map Join MAPJOIN[46][bigTable=b] in task 'Stage-2:MAPRED' is a cross product -PREHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@src_4 -PREHOOK: Output: default@src_5 -POSTHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@src_4 -POSTHOOK: Output: default@src_5 -POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-5:MAPRED -RUN: Stage-2:MAPRED -RUN: Stage-1:MOVE -RUN: Stage-0:MOVE -RUN: Stage-3:STATS -RUN: Stage-4:STATS -PREHOOK: query: select * from src_4 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_4 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_4 -#### A masked pattern was here #### -90 val_90 -90 val_90 -90 val_90 -92 val_92 -95 val_95 -95 val_95 -96 val_96 -97 val_97 -97 val_97 -98 val_98 -98 val_98 -PREHOOK: query: select * from src_5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_5 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_5 -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -10 val_10 -100 val_100 -100 val_100 -103 val_103 -103 val_103 -104 val_104 -104 val_104 -105 val_105 -11 val_11 -111 val_111 -113 val_113 -113 val_113 -114 val_114 -116 val_116 -118 val_118 -118 val_118 -119 val_119 -119 val_119 -119 val_119 -12 val_12 -12 val_12 -120 val_120 -120 val_120 -125 val_125 -125 val_125 -126 val_126 -128 val_128 -128 val_128 -128 val_128 -129 val_129 -129 val_129 -131 val_131 -133 val_133 -134 val_134 -134 val_134 -136 val_136 -137 val_137 -137 val_137 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -143 val_143 -145 val_145 -146 val_146 -146 val_146 -149 val_149 -149 val_149 -15 val_15 -15 val_15 -150 val_150 -152 val_152 -152 val_152 -153 val_153 -155 val_155 -156 val_156 -157 val_157 -158 val_158 -160 val_160 -162 val_162 -163 val_163 -164 val_164 -164 val_164 -165 val_165 -165 val_165 -166 val_166 -167 val_167 -167 val_167 -167 val_167 -168 val_168 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -17 val_17 -170 val_170 -172 val_172 -172 val_172 -174 val_174 -174 val_174 -175 val_175 -175 val_175 -176 val_176 -176 val_176 -177 val_177 -178 val_178 -179 val_179 -179 val_179 -18 val_18 -18 val_18 -180 val_180 -181 val_181 -183 val_183 -186 val_186 -187 val_187 -187 val_187 -187 val_187 -189 val_189 -19 val_19 -190 val_190 -191 val_191 -191 val_191 -192 val_192 -193 val_193 -193 val_193 -193 val_193 -194 val_194 -195 val_195 -195 val_195 -196 val_196 -197 val_197 -197 val_197 -199 val_199 -199 val_199 -199 val_199 -2 val_2 diff --git ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out index 04dd9b4630d0039620222da2907e2d51fa42784d..d6df85a8c2b0a1d8c8bfaeac40ee6ce7e4c51e16 100644 --- ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out +++ ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out @@ -73,8 +73,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 10 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4) + Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 4), Map 6 (PARTITION-LEVEL SORT, 4) Reducer 9 <- Map 8 (GROUP, 1) Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### @@ -105,21 +105,21 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan @@ -235,19 +235,17 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: bigint) + keys: 0 (type: bigint) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Stage: Stage-1 Move Operator @@ -308,10 +306,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@src_4 POSTHOOK: Output: default@src_5 -POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] RUN: Stage-2:MAPRED RUN: Stage-1:MOVE RUN: Stage-0:MOVE @@ -514,16 +512,16 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 key (type: string), value (type: string) @@ -578,16 +576,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: bigint) + keys: 0 (type: bigint) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 @@ -730,10 +726,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@src_4 POSTHOOK: Output: default@src_5 -POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] RUN: Stage-5:MAPRED RUN: Stage-2:MAPRED RUN: Stage-1:MOVE diff --git ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.7.out ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.7.out deleted file mode 100644 index 86b7544fc853b17d373b8f8b9167a88a2e92eccd..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.7.out +++ /dev/null @@ -1,217 +0,0 @@ -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE over1korc -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE over1korc -POSTHOOK: type: DROPTABLE -PREHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1k -POSTHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1k -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@over1k -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@over1k -PREHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1korc -POSTHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1korc -PREHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -PREHOOK: type: QUERY -PREHOOK: Input: default@over1k -PREHOOK: Output: default@over1korc -POSTHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1k -POSTHOOK: Output: default@over1korc -POSTHOOK: Lineage: over1korc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ] -POSTHOOK: Lineage: over1korc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ] -POSTHOOK: Lineage: over1korc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ] -POSTHOOK: Lineage: over1korc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] -POSTHOOK: Lineage: over1korc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -POSTHOOK: Lineage: over1korc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] -POSTHOOK: Lineage: over1korc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] -POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ] -POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] -POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] -POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Reducer 2 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(50), avg(50.0), avg(50) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) - Reducer 3 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@over1korc -#### A masked pattern was here #### -POSTHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1korc -#### A masked pattern was here #### -65536 50.0 50.0 50.0000 -65537 50.0 50.0 50.0000 -65538 50.0 50.0 50.0000 -65539 50.0 50.0 50.0000 -65540 50.0 50.0 50.0000 -65541 50.0 50.0 50.0000 -65542 50.0 50.0 50.0000 -65543 50.0 50.0 50.0000 -65544 50.0 50.0 50.0000 -65545 50.0 50.0 50.0000 diff --git ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.8.out ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.8.out deleted file mode 100644 index 69f475440b8aabba9bc7005e6029de6565680967..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/spark/vector_cast_constant.q.java1.8.out +++ /dev/null @@ -1,203 +0,0 @@ -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE over1korc -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE over1korc -POSTHOOK: type: DROPTABLE -PREHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1k -POSTHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1k -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@over1k -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@over1k -PREHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1korc -POSTHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1korc -PREHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -PREHOOK: type: QUERY -PREHOOK: Input: default@over1k -PREHOOK: Output: default@over1korc -POSTHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1k -POSTHOOK: Output: default@over1korc -POSTHOOK: Lineage: over1korc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ] -POSTHOOK: Lineage: over1korc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ] -POSTHOOK: Lineage: over1korc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ] -POSTHOOK: Lineage: over1korc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] -POSTHOOK: Lineage: over1korc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -POSTHOOK: Lineage: over1korc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] -POSTHOOK: Lineage: over1korc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] -POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ] -POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] -POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] -POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 2) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(50), avg(50.0), avg(50) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@over1korc -#### A masked pattern was here #### -POSTHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1korc -#### A masked pattern was here #### -65636 50.0 50.0 50 -65550 50.0 50.0 50 -65592 50.0 50.0 50 -65744 50.0 50.0 50 -65722 50.0 50.0 50 -65668 50.0 50.0 50 -65598 50.0 50.0 50 -65596 50.0 50.0 50 -65568 50.0 50.0 50 -65738 50.0 50.0 50 diff --git ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out index 63cdc2437ea5735d8b285ed52f6836fb6a45f4d9..0459d93162a7bc07f0d12ec0fd4cc839422657ac 100644 --- ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out @@ -102,14 +102,14 @@ PREHOOK: query: EXPLAIN SELECT AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -119,7 +119,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 4) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -129,11 +130,11 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i (type: int) - outputColumnNames: i + outputColumnNames: _col0 Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(50), avg(UDFToDouble(50)), avg(CAST( 50 AS decimal(10,0))) - keys: i (type: int) + aggregations: avg(50), avg(50.0), avg(50) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE @@ -152,6 +153,19 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) + Reducer 3 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE @@ -159,8 +173,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -174,7 +188,7 @@ PREHOOK: query: SELECT AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@over1korc #### A masked pattern was here #### @@ -183,17 +197,17 @@ POSTHOOK: query: SELECT AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@over1korc #### A masked pattern was here #### -65598 50.0 50.0 50 -65694 50.0 50.0 50 -65678 50.0 50.0 50 -65684 50.0 50.0 50 -65596 50.0 50.0 50 -65692 50.0 50.0 50 -65630 50.0 50.0 50 -65674 50.0 50.0 50 -65628 50.0 50.0 50 -65776 50.0 50.0 50 +65536 50.0 50.0 50.0000 +65537 50.0 50.0 50.0000 +65538 50.0 50.0 50.0000 +65539 50.0 50.0 50.0000 +65540 50.0 50.0 50.0000 +65541 50.0 50.0 50.0000 +65542 50.0 50.0 50.0000 +65543 50.0 50.0 50.0000 +65544 50.0 50.0 50.0000 +65545 50.0 50.0 50.0000 diff --git ql/src/test/results/clientpositive/stats_list_bucket.q.java1.7.out ql/src/test/results/clientpositive/stats_list_bucket.q.java1.7.out deleted file mode 100644 index a4908bcf7912cc3fedbe15f6bfd5f3581fae3b16..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/stats_list_bucket.q.java1.7.out +++ /dev/null @@ -1,191 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- JAVA_VERSION_SPECIFIC_OUTPUT - -drop table stats_list_bucket -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- JAVA_VERSION_SPECIFIC_OUTPUT - -drop table stats_list_bucket -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table stats_list_bucket_1 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table stats_list_bucket_1 -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table stats_list_bucket ( - c1 string, - c2 string -) partitioned by (ds string, hr string) -skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) -stored as directories -stored as rcfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@stats_list_bucket -POSTHOOK: query: create table stats_list_bucket ( - c1 string, - c2 string -) partitioned by (ds string, hr string) -skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) -stored as directories -stored as rcfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@stats_list_bucket -PREHOOK: query: -- Try partitioned table with list bucketing. --- The stats should show 500 rows loaded, as many rows as the src table has. - -insert overwrite table stats_list_bucket partition (ds = '2008-04-08', hr = '11') - select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@stats_list_bucket@ds=2008-04-08/hr=11 -POSTHOOK: query: -- Try partitioned table with list bucketing. --- The stats should show 500 rows loaded, as many rows as the src table has. - -insert overwrite table stats_list_bucket partition (ds = '2008-04-08', hr = '11') - select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@stats_list_bucket@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: stats_list_bucket PARTITION(ds=2008-04-08,hr=11).c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: stats_list_bucket PARTITION(ds=2008-04-08,hr=11).c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: desc formatted stats_list_bucket partition (ds = '2008-04-08', hr = '11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@stats_list_bucket -POSTHOOK: query: desc formatted stats_list_bucket partition (ds = '2008-04-08', hr = '11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@stats_list_bucket -# col_name data_type comment - -c1 string -c2 string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: stats_list_bucket -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 - numRows 500 - rawDataSize 4812 - totalSize 5522 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [c1, c2] -Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[82, val_82]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=82/c2=val_82, [466, val_466]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=466/c2=val_466, [287, val_287]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=287/c2=val_287} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: -- Also try non-partitioned table with list bucketing. --- Stats should show the same number of rows. - -create table stats_list_bucket_1 ( - c1 string, - c2 string -) -skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) -stored as directories -stored as rcfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@stats_list_bucket_1 -POSTHOOK: query: -- Also try non-partitioned table with list bucketing. --- Stats should show the same number of rows. - -create table stats_list_bucket_1 ( - c1 string, - c2 string -) -skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) -stored as directories -stored as rcfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@stats_list_bucket_1 -PREHOOK: query: insert overwrite table stats_list_bucket_1 - select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@stats_list_bucket_1 -POSTHOOK: query: insert overwrite table stats_list_bucket_1 - select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@stats_list_bucket_1 -POSTHOOK: Lineage: stats_list_bucket_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: stats_list_bucket_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: desc formatted stats_list_bucket_1 -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@stats_list_bucket_1 -POSTHOOK: query: desc formatted stats_list_bucket_1 -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@stats_list_bucket_1 -# col_name data_type comment - -c1 string -c2 string - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 - numRows 500 - rawDataSize 4812 - totalSize 5522 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [c1, c2] -Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[82, val_82]=/stats_list_bucket_1/c1=82/c2=val_82, [466, val_466]=/stats_list_bucket_1/c1=466/c2=val_466, [287, val_287]=/stats_list_bucket_1/c1=287/c2=val_287} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table stats_list_bucket -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@stats_list_bucket -PREHOOK: Output: default@stats_list_bucket -POSTHOOK: query: drop table stats_list_bucket -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@stats_list_bucket -POSTHOOK: Output: default@stats_list_bucket -PREHOOK: query: drop table stats_list_bucket_1 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@stats_list_bucket_1 -PREHOOK: Output: default@stats_list_bucket_1 -POSTHOOK: query: drop table stats_list_bucket_1 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@stats_list_bucket_1 -POSTHOOK: Output: default@stats_list_bucket_1 diff --git ql/src/test/results/clientpositive/stats_list_bucket.q.java1.8.out ql/src/test/results/clientpositive/stats_list_bucket.q.java1.8.out deleted file mode 100644 index 8688cee49a966232014877bbd569833d7a595266..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/stats_list_bucket.q.java1.8.out +++ /dev/null @@ -1,193 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- JAVA_VERSION_SPECIFIC_OUTPUT - -drop table stats_list_bucket -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) --- JAVA_VERSION_SPECIFIC_OUTPUT - -drop table stats_list_bucket -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table stats_list_bucket_1 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table stats_list_bucket_1 -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table stats_list_bucket ( - c1 string, - c2 string -) partitioned by (ds string, hr string) -skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) -stored as directories -stored as rcfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@stats_list_bucket -POSTHOOK: query: create table stats_list_bucket ( - c1 string, - c2 string -) partitioned by (ds string, hr string) -skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) -stored as directories -stored as rcfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@stats_list_bucket -PREHOOK: query: -- Make sure we use hashed IDs during stats publishing. --- Try partitioned table with list bucketing. --- The stats should show 500 rows loaded, as many rows as the src table has. - -insert overwrite table stats_list_bucket partition (ds = '2008-04-08', hr = '11') - select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@stats_list_bucket@ds=2008-04-08/hr=11 -POSTHOOK: query: -- Make sure we use hashed IDs during stats publishing. --- Try partitioned table with list bucketing. --- The stats should show 500 rows loaded, as many rows as the src table has. - -insert overwrite table stats_list_bucket partition (ds = '2008-04-08', hr = '11') - select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@stats_list_bucket@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: stats_list_bucket PARTITION(ds=2008-04-08,hr=11).c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: stats_list_bucket PARTITION(ds=2008-04-08,hr=11).c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: desc formatted stats_list_bucket partition (ds = '2008-04-08', hr = '11') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@stats_list_bucket -POSTHOOK: query: desc formatted stats_list_bucket partition (ds = '2008-04-08', hr = '11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@stats_list_bucket -# col_name data_type comment - -c1 string -c2 string - -# Partition Information -# col_name data_type comment - -ds string -hr string - -# Detailed Partition Information -Partition Value: [2008-04-08, 11] -Database: default -Table: stats_list_bucket -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE true - numFiles 4 - numRows 500 - rawDataSize 4812 - totalSize 5522 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [c1, c2] -Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[466, val_466]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=466/c2=val_466, [287, val_287]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=287/c2=val_287, [82, val_82]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=82/c2=val_82} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: -- Also try non-partitioned table with list bucketing. --- Stats should show the same number of rows. - -create table stats_list_bucket_1 ( - c1 string, - c2 string -) -skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) -stored as directories -stored as rcfile -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@stats_list_bucket_1 -POSTHOOK: query: -- Also try non-partitioned table with list bucketing. --- Stats should show the same number of rows. - -create table stats_list_bucket_1 ( - c1 string, - c2 string -) -skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) -stored as directories -stored as rcfile -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@stats_list_bucket_1 -PREHOOK: query: insert overwrite table stats_list_bucket_1 - select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@stats_list_bucket_1 -POSTHOOK: query: insert overwrite table stats_list_bucket_1 - select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@stats_list_bucket_1 -POSTHOOK: Lineage: stats_list_bucket_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: stats_list_bucket_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: desc formatted stats_list_bucket_1 -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@stats_list_bucket_1 -POSTHOOK: query: desc formatted stats_list_bucket_1 -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@stats_list_bucket_1 -# col_name data_type comment - -c1 string -c2 string - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: MANAGED_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE true - numFiles 4 - numRows 500 - rawDataSize 4812 - totalSize 5522 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe -InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Stored As SubDirectories: Yes -Skewed Columns: [c1, c2] -Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] -#### A masked pattern was here #### -Skewed Value to Truncated Path: {[466, val_466]=/stats_list_bucket_1/c1=466/c2=val_466, [82, val_82]=/stats_list_bucket_1/c1=82/c2=val_82, [287, val_287]=/stats_list_bucket_1/c1=287/c2=val_287} -Storage Desc Params: - serialization.format 1 -PREHOOK: query: drop table stats_list_bucket -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@stats_list_bucket -PREHOOK: Output: default@stats_list_bucket -POSTHOOK: query: drop table stats_list_bucket -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@stats_list_bucket -POSTHOOK: Output: default@stats_list_bucket -PREHOOK: query: drop table stats_list_bucket_1 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@stats_list_bucket_1 -PREHOOK: Output: default@stats_list_bucket_1 -POSTHOOK: query: drop table stats_list_bucket_1 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@stats_list_bucket_1 -POSTHOOK: Output: default@stats_list_bucket_1 diff --git ql/src/test/results/clientpositive/stats_list_bucket.q.out ql/src/test/results/clientpositive/stats_list_bucket.q.out new file mode 100644 index 0000000000000000000000000000000000000000..c34c414a1172932b9bfbe35f8cf4c3be40151089 --- /dev/null +++ ql/src/test/results/clientpositive/stats_list_bucket.q.out @@ -0,0 +1,189 @@ +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) + +drop table stats_list_bucket +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) + +drop table stats_list_bucket +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table stats_list_bucket_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table stats_list_bucket_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table stats_list_bucket ( + c1 string, + c2 string +) partitioned by (ds string, hr string) +skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) +stored as directories +stored as rcfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@stats_list_bucket +POSTHOOK: query: create table stats_list_bucket ( + c1 string, + c2 string +) partitioned by (ds string, hr string) +skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) +stored as directories +stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@stats_list_bucket +PREHOOK: query: -- Try partitioned table with list bucketing. +-- The stats should show 500 rows loaded, as many rows as the src table has. + +insert overwrite table stats_list_bucket partition (ds = '2008-04-08', hr = '11') + select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@stats_list_bucket@ds=2008-04-08/hr=11 +POSTHOOK: query: -- Try partitioned table with list bucketing. +-- The stats should show 500 rows loaded, as many rows as the src table has. + +insert overwrite table stats_list_bucket partition (ds = '2008-04-08', hr = '11') + select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@stats_list_bucket@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: stats_list_bucket PARTITION(ds=2008-04-08,hr=11).c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: stats_list_bucket PARTITION(ds=2008-04-08,hr=11).c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: desc formatted stats_list_bucket partition (ds = '2008-04-08', hr = '11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_list_bucket +POSTHOOK: query: desc formatted stats_list_bucket partition (ds = '2008-04-08', hr = '11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_list_bucket +# col_name data_type comment + +c1 string +c2 string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: stats_list_bucket +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 4 + numRows 500 + rawDataSize 4812 + totalSize 5522 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [c1, c2] +Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[466, val_466]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=466/c2=val_466, [82, val_82]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=82/c2=val_82, [287, val_287]=/stats_list_bucket/ds=2008-04-08/hr=11/c1=287/c2=val_287} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- Also try non-partitioned table with list bucketing. +-- Stats should show the same number of rows. + +create table stats_list_bucket_1 ( + c1 string, + c2 string +) +skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) +stored as directories +stored as rcfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@stats_list_bucket_1 +POSTHOOK: query: -- Also try non-partitioned table with list bucketing. +-- Stats should show the same number of rows. + +create table stats_list_bucket_1 ( + c1 string, + c2 string +) +skewed by (c1, c2) on (('466','val_466'),('287','val_287'),('82','val_82')) +stored as directories +stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@stats_list_bucket_1 +PREHOOK: query: insert overwrite table stats_list_bucket_1 + select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@stats_list_bucket_1 +POSTHOOK: query: insert overwrite table stats_list_bucket_1 + select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@stats_list_bucket_1 +POSTHOOK: Lineage: stats_list_bucket_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: stats_list_bucket_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: desc formatted stats_list_bucket_1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_list_bucket_1 +POSTHOOK: query: desc formatted stats_list_bucket_1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_list_bucket_1 +# col_name data_type comment + +c1 string +c2 string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 4 + numRows 500 + rawDataSize 4812 + totalSize 5522 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [c1, c2] +Skewed Values: [[466, val_466], [287, val_287], [82, val_82]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[466, val_466]=/stats_list_bucket_1/c1=466/c2=val_466, [287, val_287]=/stats_list_bucket_1/c1=287/c2=val_287, [82, val_82]=/stats_list_bucket_1/c1=82/c2=val_82} +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table stats_list_bucket +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@stats_list_bucket +PREHOOK: Output: default@stats_list_bucket +POSTHOOK: query: drop table stats_list_bucket +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@stats_list_bucket +POSTHOOK: Output: default@stats_list_bucket +PREHOOK: query: drop table stats_list_bucket_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@stats_list_bucket_1 +PREHOOK: Output: default@stats_list_bucket_1 +POSTHOOK: query: drop table stats_list_bucket_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@stats_list_bucket_1 +POSTHOOK: Output: default@stats_list_bucket_1 diff --git ql/src/test/results/clientpositive/str_to_map.q.java1.7.out ql/src/test/results/clientpositive/str_to_map.q.java1.7.out deleted file mode 100644 index 652acbb67755437f519b5e174d8425ca7d1c65ae..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/str_to_map.q.java1.7.out +++ /dev/null @@ -1,220 +0,0 @@ -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -desc function str_to_map -PREHOOK: type: DESCFUNCTION -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -desc function str_to_map -POSTHOOK: type: DESCFUNCTION -str_to_map(text, delimiter1, delimiter2) - Creates a map by parsing text -PREHOOK: query: desc function extended str_to_map -PREHOOK: type: DESCFUNCTION -POSTHOOK: query: desc function extended str_to_map -POSTHOOK: type: DESCFUNCTION -str_to_map(text, delimiter1, delimiter2) - Creates a map by parsing text -Split text into key-value pairs using two delimiters. The first delimiter seperates pairs, and the second delimiter sperates key and value. If only one parameter is given, default delimiters are used: ',' as delimiter1 and '=' as delimiter2. -PREHOOK: query: explain select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3 -PREHOOK: type: QUERY -POSTHOOK: query: explain select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 3 - Processor Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: str_to_map('a=1,b=2,c=3',',','=')['a'] (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE - ListSink - -PREHOOK: query: select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -1 -1 -1 -PREHOOK: query: explain select str_to_map('a:1,b:2,c:3') from src limit 3 -PREHOOK: type: QUERY -POSTHOOK: query: explain select str_to_map('a:1,b:2,c:3') from src limit 3 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 3 - Processor Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: str_to_map('a:1,b:2,c:3') (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 377000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 2262 Basic stats: COMPLETE Column stats: COMPLETE - ListSink - -PREHOOK: query: select str_to_map('a:1,b:2,c:3') from src limit 3 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select str_to_map('a:1,b:2,c:3') from src limit 3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -{"b":"2","a":"1","c":"3"} -{"b":"2","a":"1","c":"3"} -{"b":"2","a":"1","c":"3"} -PREHOOK: query: explain select str_to_map('a:1,b:2,c:3',',',':') from src limit 3 -PREHOOK: type: QUERY -POSTHOOK: query: explain select str_to_map('a:1,b:2,c:3',',',':') from src limit 3 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 3 - Processor Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: str_to_map('a:1,b:2,c:3',',',':') (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 377000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 2262 Basic stats: COMPLETE Column stats: COMPLETE - ListSink - -PREHOOK: query: select str_to_map('a:1,b:2,c:3',',',':') from src limit 3 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select str_to_map('a:1,b:2,c:3',',',':') from src limit 3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -{"b":"2","a":"1","c":"3"} -{"b":"2","a":"1","c":"3"} -{"b":"2","a":"1","c":"3"} -PREHOOK: query: explain select str_to_map(t.ss,',',':')['a'] -from (select transform('a:1,b:2,c:3') using 'cat' as (ss) from src) t -limit 3 -PREHOOK: type: QUERY -POSTHOOK: query: explain select str_to_map(t.ss,',',':')['a'] -from (select transform('a:1,b:2,c:3') using 'cat' as (ss) from src) t -limit 3 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'a:1,b:2,c:3' (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Transform Operator - command: cat - output info: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: str_to_map(_col0,',',':')['a'] (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 3 - Processor Tree: - ListSink - -PREHOOK: query: select str_to_map(t.ss,',',':')['a'] -from (select transform('a:1,b:2,c:3') using 'cat' as (ss) from src) t -limit 3 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select str_to_map(t.ss,',',':')['a'] -from (select transform('a:1,b:2,c:3') using 'cat' as (ss) from src) t -limit 3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -1 -1 -1 -PREHOOK: query: drop table tbl_s2m -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table tbl_s2m -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table tbl_s2m as select 'ABC=CC_333=444' as t from src tablesample (3 rows) -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@src -PREHOOK: Output: database:default -PREHOOK: Output: default@tbl_s2m -POSTHOOK: query: create table tbl_s2m as select 'ABC=CC_333=444' as t from src tablesample (3 rows) -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@src -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tbl_s2m -POSTHOOK: Lineage: tbl_s2m.t SIMPLE [] -PREHOOK: query: select str_to_map(t,'_','=')['333'] from tbl_s2m -PREHOOK: type: QUERY -PREHOOK: Input: default@tbl_s2m -#### A masked pattern was here #### -POSTHOOK: query: select str_to_map(t,'_','=')['333'] from tbl_s2m -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl_s2m -#### A masked pattern was here #### -444 -444 -444 -PREHOOK: query: drop table tbl_s2m -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@tbl_s2m -PREHOOK: Output: default@tbl_s2m -POSTHOOK: query: drop table tbl_s2m -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@tbl_s2m -POSTHOOK: Output: default@tbl_s2m diff --git ql/src/test/results/clientpositive/str_to_map.q.java1.8.out ql/src/test/results/clientpositive/str_to_map.q.java1.8.out deleted file mode 100644 index 23b0cbb33eb6bec217f79fd24ec6473726b55cac..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/str_to_map.q.java1.8.out +++ /dev/null @@ -1,219 +0,0 @@ -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -desc function str_to_map -PREHOOK: type: DESCFUNCTION -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -desc function str_to_map -POSTHOOK: type: DESCFUNCTION -str_to_map(text, delimiter1, delimiter2) - Creates a map by parsing text -PREHOOK: query: desc function extended str_to_map -PREHOOK: type: DESCFUNCTION -POSTHOOK: query: desc function extended str_to_map -POSTHOOK: type: DESCFUNCTION -str_to_map(text, delimiter1, delimiter2) - Creates a map by parsing text -Split text into key-value pairs using two delimiters. The first delimiter seperates pairs, and the second delimiter sperates key and value. If only one parameter is given, default delimiters are used: ',' as delimiter1 and '=' as delimiter2. -PREHOOK: query: explain select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3 -PREHOOK: type: QUERY -POSTHOOK: query: explain select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 3 - Processor Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: str_to_map('a=1,b=2,c=3',',','=')['a'] (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - ListSink - -PREHOOK: query: select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -1 -1 -1 -PREHOOK: query: explain select str_to_map('a:1,b:2,c:3') from src limit 3 -PREHOOK: type: QUERY -POSTHOOK: query: explain select str_to_map('a:1,b:2,c:3') from src limit 3 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 3 - Processor Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: str_to_map('a:1,b:2,c:3') (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 460000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 2760 Basic stats: COMPLETE Column stats: COMPLETE - ListSink - -PREHOOK: query: select str_to_map('a:1,b:2,c:3') from src limit 3 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select str_to_map('a:1,b:2,c:3') from src limit 3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -{"a":"1","b":"2","c":"3"} -{"a":"1","b":"2","c":"3"} -{"a":"1","b":"2","c":"3"} -PREHOOK: query: explain select str_to_map('a:1,b:2,c:3',',',':') from src limit 3 -PREHOOK: type: QUERY -POSTHOOK: query: explain select str_to_map('a:1,b:2,c:3',',',':') from src limit 3 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-0 is a root stage - -STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: 3 - Processor Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: str_to_map('a:1,b:2,c:3',',',':') (type: map) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 460000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 2760 Basic stats: COMPLETE Column stats: COMPLETE - ListSink - -PREHOOK: query: select str_to_map('a:1,b:2,c:3',',',':') from src limit 3 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select str_to_map('a:1,b:2,c:3',',',':') from src limit 3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -{"a":"1","b":"2","c":"3"} -{"a":"1","b":"2","c":"3"} -{"a":"1","b":"2","c":"3"} -PREHOOK: query: explain select str_to_map(t.ss,',',':')['a'] -from (select transform('a:1,b:2,c:3') using 'cat' as (ss) from src) t -limit 3 -PREHOOK: type: QUERY -POSTHOOK: query: explain select str_to_map(t.ss,',',':')['a'] -from (select transform('a:1,b:2,c:3') using 'cat' as (ss) from src) t -limit 3 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'a:1,b:2,c:3' (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Transform Operator - command: cat - output info: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: str_to_map(_col0,',',':')['a'] (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 3 - Processor Tree: - ListSink - -PREHOOK: query: select str_to_map(t.ss,',',':')['a'] -from (select transform('a:1,b:2,c:3') using 'cat' as (ss) from src) t -limit 3 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select str_to_map(t.ss,',',':')['a'] -from (select transform('a:1,b:2,c:3') using 'cat' as (ss) from src) t -limit 3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -1 -1 -1 -PREHOOK: query: drop table tbl_s2m -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table tbl_s2m -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table tbl_s2m as select 'ABC=CC_333=444' as t from src tablesample (3 rows) -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@src -PREHOOK: Output: database:default -PREHOOK: Output: default@tbl_s2m -POSTHOOK: query: create table tbl_s2m as select 'ABC=CC_333=444' as t from src tablesample (3 rows) -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@src -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tbl_s2m -PREHOOK: query: select str_to_map(t,'_','=')['333'] from tbl_s2m -PREHOOK: type: QUERY -PREHOOK: Input: default@tbl_s2m -#### A masked pattern was here #### -POSTHOOK: query: select str_to_map(t,'_','=')['333'] from tbl_s2m -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl_s2m -#### A masked pattern was here #### -444 -444 -444 -PREHOOK: query: drop table tbl_s2m -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@tbl_s2m -PREHOOK: Output: default@tbl_s2m -POSTHOOK: query: drop table tbl_s2m -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@tbl_s2m -POSTHOOK: Output: default@tbl_s2m diff --git ql/src/test/results/clientpositive/str_to_map.q.out ql/src/test/results/clientpositive/str_to_map.q.out new file mode 100644 index 0000000000000000000000000000000000000000..30c98db441459dba3efe32dc98cd33c55d694083 --- /dev/null +++ ql/src/test/results/clientpositive/str_to_map.q.out @@ -0,0 +1,216 @@ +PREHOOK: query: desc function str_to_map +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: desc function str_to_map +POSTHOOK: type: DESCFUNCTION +str_to_map(text, delimiter1, delimiter2) - Creates a map by parsing text +PREHOOK: query: desc function extended str_to_map +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: desc function extended str_to_map +POSTHOOK: type: DESCFUNCTION +str_to_map(text, delimiter1, delimiter2) - Creates a map by parsing text +Split text into key-value pairs using two delimiters. The first delimiter seperates pairs, and the second delimiter sperates key and value. If only one parameter is given, default delimiters are used: ',' as delimiter1 and '=' as delimiter2. +PREHOOK: query: explain select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 3 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: str_to_map('a=1,b=2,c=3',',','=')['a'] (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select str_to_map('a=1,b=2,c=3',',','=')['a'] from src limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1 +1 +1 +PREHOOK: query: explain select str_to_map('a:1,b:2,c:3') from src limit 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select str_to_map('a:1,b:2,c:3') from src limit 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 3 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: str_to_map('a:1,b:2,c:3') (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 377000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 2262 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: select str_to_map('a:1,b:2,c:3') from src limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select str_to_map('a:1,b:2,c:3') from src limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +{"a":"1","b":"2","c":"3"} +{"a":"1","b":"2","c":"3"} +{"a":"1","b":"2","c":"3"} +PREHOOK: query: explain select str_to_map('a:1,b:2,c:3',',',':') from src limit 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select str_to_map('a:1,b:2,c:3',',',':') from src limit 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 3 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: str_to_map('a:1,b:2,c:3',',',':') (type: map) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 377000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 2262 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: select str_to_map('a:1,b:2,c:3',',',':') from src limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select str_to_map('a:1,b:2,c:3',',',':') from src limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +{"a":"1","b":"2","c":"3"} +{"a":"1","b":"2","c":"3"} +{"a":"1","b":"2","c":"3"} +PREHOOK: query: explain select str_to_map(t.ss,',',':')['a'] +from (select transform('a:1,b:2,c:3') using 'cat' as (ss) from src) t +limit 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select str_to_map(t.ss,',',':')['a'] +from (select transform('a:1,b:2,c:3') using 'cat' as (ss) from src) t +limit 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'a:1,b:2,c:3' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: str_to_map(_col0,',',':')['a'] (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 3 + Processor Tree: + ListSink + +PREHOOK: query: select str_to_map(t.ss,',',':')['a'] +from (select transform('a:1,b:2,c:3') using 'cat' as (ss) from src) t +limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select str_to_map(t.ss,',',':')['a'] +from (select transform('a:1,b:2,c:3') using 'cat' as (ss) from src) t +limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1 +1 +1 +PREHOOK: query: drop table tbl_s2m +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table tbl_s2m +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table tbl_s2m as select 'ABC=CC_333=444' as t from src tablesample (3 rows) +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl_s2m +POSTHOOK: query: create table tbl_s2m as select 'ABC=CC_333=444' as t from src tablesample (3 rows) +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl_s2m +POSTHOOK: Lineage: tbl_s2m.t SIMPLE [] +PREHOOK: query: select str_to_map(t,'_','=')['333'] from tbl_s2m +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_s2m +#### A masked pattern was here #### +POSTHOOK: query: select str_to_map(t,'_','=')['333'] from tbl_s2m +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_s2m +#### A masked pattern was here #### +444 +444 +444 +PREHOOK: query: drop table tbl_s2m +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl_s2m +PREHOOK: Output: default@tbl_s2m +POSTHOOK: query: drop table tbl_s2m +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl_s2m +POSTHOOK: Output: default@tbl_s2m diff --git ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.7.out ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.7.out deleted file mode 100644 index 279843b4dd835e75e215387d8894429448b588a2..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.7.out +++ /dev/null @@ -1,999 +0,0 @@ -PREHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -CREATE TABLE src_4( - key STRING, - value STRING -) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@src_4 -POSTHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -CREATE TABLE src_4( - key STRING, - value STRING -) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_4 -RUN: Stage-0:DDL -PREHOOK: query: CREATE TABLE src_5( - key STRING, - value STRING -) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@src_5 -POSTHOOK: query: CREATE TABLE src_5( - key STRING, - value STRING -) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_5 -RUN: Stage-0:DDL -Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -POSTHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-2 depends on stages: Stage-10 - Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 - Stage-6 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-10 - Map Reduce - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '2') and key is null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: 0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '2') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col5 is null (type: boolean) - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-5 - Stats-Aggr Operator - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - - Stage: Stage-7 - Stats-Aggr Operator - -Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@src_4 -PREHOOK: Output: default@src_5 -POSTHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@src_4 -POSTHOOK: Output: default@src_5 -POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-10:MAPRED -RUN: Stage-2:MAPRED -RUN: Stage-3:MAPRED -RUN: Stage-6:MAPRED -RUN: Stage-4:MAPRED -RUN: Stage-0:MOVE -RUN: Stage-1:MOVE -RUN: Stage-7:STATS -RUN: Stage-5:STATS -PREHOOK: query: select * from src_4 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_4 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_4 -#### A masked pattern was here #### -90 val_90 -90 val_90 -90 val_90 -92 val_92 -95 val_95 -95 val_95 -96 val_96 -97 val_97 -97 val_97 -98 val_98 -98 val_98 -PREHOOK: query: select * from src_5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_5 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_5 -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -10 val_10 -100 val_100 -100 val_100 -103 val_103 -103 val_103 -104 val_104 -104 val_104 -105 val_105 -11 val_11 -111 val_111 -113 val_113 -113 val_113 -114 val_114 -116 val_116 -118 val_118 -118 val_118 -119 val_119 -119 val_119 -119 val_119 -12 val_12 -12 val_12 -120 val_120 -120 val_120 -125 val_125 -125 val_125 -126 val_126 -128 val_128 -128 val_128 -128 val_128 -129 val_129 -129 val_129 -131 val_131 -133 val_133 -134 val_134 -134 val_134 -136 val_136 -137 val_137 -137 val_137 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -143 val_143 -145 val_145 -146 val_146 -146 val_146 -149 val_149 -149 val_149 -15 val_15 -15 val_15 -150 val_150 -152 val_152 -152 val_152 -153 val_153 -155 val_155 -156 val_156 -157 val_157 -158 val_158 -160 val_160 -162 val_162 -163 val_163 -164 val_164 -164 val_164 -165 val_165 -165 val_165 -166 val_166 -167 val_167 -167 val_167 -167 val_167 -168 val_168 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -17 val_17 -170 val_170 -172 val_172 -172 val_172 -174 val_174 -174 val_174 -175 val_175 -175 val_175 -176 val_176 -176 val_176 -177 val_177 -178 val_178 -179 val_179 -179 val_179 -18 val_18 -18 val_18 -180 val_180 -181 val_181 -183 val_183 -186 val_186 -187 val_187 -187 val_187 -187 val_187 -189 val_189 -19 val_19 -190 val_190 -191 val_191 -191 val_191 -192 val_192 -193 val_193 -193 val_193 -193 val_193 -194 val_194 -195 val_195 -195 val_195 -196 val_196 -197 val_197 -197 val_197 -199 val_199 -199 val_199 -199 val_199 -2 val_2 -Warning: Map Join MAPJOIN[55][bigTable=b] in task 'Stage-13:MAPRED' is a cross product -Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -POSTHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-14 depends on stages: Stage-10 , consists of Stage-17, Stage-2 - Stage-17 has a backup stage: Stage-2 - Stage-13 depends on stages: Stage-17 - Stage-15 depends on stages: Stage-2, Stage-13 - Stage-12 depends on stages: Stage-15 - Stage-0 depends on stages: Stage-12 - Stage-7 depends on stages: Stage-0 - Stage-16 depends on stages: Stage-2, Stage-13 - Stage-4 depends on stages: Stage-16 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 - Stage-2 - -STAGE PLANS: - Stage: Stage-10 - Map Reduce - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '2') and key is null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: 0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-14 - Conditional Operator - - Stage: Stage-17 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 - 1 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-15 - Map Reduce Local Work - Alias -> Map Local Tables: - sq_1:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - sq_1:a - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - - Stage: Stage-7 - Stats-Aggr Operator - - Stage: Stage-16 - Map Reduce Local Work - Alias -> Map Local Tables: - sq_2:s1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - sq_2:s1 - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '2') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col5 is null (type: boolean) - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-5 - Stats-Aggr Operator - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - -Warning: Map Join MAPJOIN[55][bigTable=b] in task 'Stage-13:MAPRED' is a cross product -Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@src_4 -PREHOOK: Output: default@src_5 -POSTHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@src_4 -POSTHOOK: Output: default@src_5 -POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-10:MAPRED -RUN: Stage-14:CONDITIONAL -RUN: Stage-17:MAPREDLOCAL -RUN: Stage-13:MAPRED -RUN: Stage-15:MAPREDLOCAL -RUN: Stage-16:MAPREDLOCAL -RUN: Stage-12:MAPRED -RUN: Stage-4:MAPRED -RUN: Stage-0:MOVE -RUN: Stage-1:MOVE -RUN: Stage-7:STATS -RUN: Stage-5:STATS -PREHOOK: query: select * from src_4 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_4 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_4 -#### A masked pattern was here #### -90 val_90 -90 val_90 -90 val_90 -92 val_92 -95 val_95 -95 val_95 -96 val_96 -97 val_97 -97 val_97 -98 val_98 -98 val_98 -PREHOOK: query: select * from src_5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_5 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_5 -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -10 val_10 -100 val_100 -100 val_100 -103 val_103 -103 val_103 -104 val_104 -104 val_104 -105 val_105 -11 val_11 -111 val_111 -113 val_113 -113 val_113 -114 val_114 -116 val_116 -118 val_118 -118 val_118 -119 val_119 -119 val_119 -119 val_119 -12 val_12 -12 val_12 -120 val_120 -120 val_120 -125 val_125 -125 val_125 -126 val_126 -128 val_128 -128 val_128 -128 val_128 -129 val_129 -129 val_129 -131 val_131 -133 val_133 -134 val_134 -134 val_134 -136 val_136 -137 val_137 -137 val_137 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -143 val_143 -145 val_145 -146 val_146 -146 val_146 -149 val_149 -149 val_149 -15 val_15 -15 val_15 -150 val_150 -152 val_152 -152 val_152 -153 val_153 -155 val_155 -156 val_156 -157 val_157 -158 val_158 -160 val_160 -162 val_162 -163 val_163 -164 val_164 -164 val_164 -165 val_165 -165 val_165 -166 val_166 -167 val_167 -167 val_167 -167 val_167 -168 val_168 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -17 val_17 -170 val_170 -172 val_172 -172 val_172 -174 val_174 -174 val_174 -175 val_175 -175 val_175 -176 val_176 -176 val_176 -177 val_177 -178 val_178 -179 val_179 -179 val_179 -18 val_18 -18 val_18 -180 val_180 -181 val_181 -183 val_183 -186 val_186 -187 val_187 -187 val_187 -187 val_187 -189 val_189 -19 val_19 -190 val_190 -191 val_191 -191 val_191 -192 val_192 -193 val_193 -193 val_193 -193 val_193 -194 val_194 -195 val_195 -195 val_195 -196 val_196 -197 val_197 -197 val_197 -199 val_199 -199 val_199 -199 val_199 -2 val_2 diff --git ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.8.out ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.8.out deleted file mode 100644 index 899723f273a0eb7a6e47b12b7a25cb37e396def8..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/subquery_multiinsert.q.java1.8.out +++ /dev/null @@ -1,999 +0,0 @@ -PREHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -CREATE TABLE src_4( - key STRING, - value STRING -) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@src_4 -POSTHOOK: query: -- SORT_QUERY_RESULTS --- JAVA_VERSION_SPECIFIC_OUTPUT - -CREATE TABLE src_4( - key STRING, - value STRING -) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_4 -RUN: Stage-0:DDL -PREHOOK: query: CREATE TABLE src_5( - key STRING, - value STRING -) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@src_5 -POSTHOOK: query: CREATE TABLE src_5( - key STRING, - value STRING -) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_5 -RUN: Stage-0:DDL -Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -POSTHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-2 depends on stages: Stage-10 - Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 - Stage-6 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-10 - Map Reduce - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '2') and key is null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: 0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '2') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col5 is null (type: boolean) - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-5 - Stats-Aggr Operator - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - - Stage: Stage-7 - Stats-Aggr Operator - -Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@src_4 -PREHOOK: Output: default@src_5 -POSTHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@src_4 -POSTHOOK: Output: default@src_5 -POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-10:MAPRED -RUN: Stage-2:MAPRED -RUN: Stage-3:MAPRED -RUN: Stage-6:MAPRED -RUN: Stage-4:MAPRED -RUN: Stage-0:MOVE -RUN: Stage-1:MOVE -RUN: Stage-7:STATS -RUN: Stage-5:STATS -PREHOOK: query: select * from src_4 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_4 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_4 -#### A masked pattern was here #### -90 val_90 -90 val_90 -90 val_90 -92 val_92 -95 val_95 -95 val_95 -96 val_96 -97 val_97 -97 val_97 -98 val_98 -98 val_98 -PREHOOK: query: select * from src_5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_5 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_5 -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -10 val_10 -100 val_100 -100 val_100 -103 val_103 -103 val_103 -104 val_104 -104 val_104 -105 val_105 -11 val_11 -111 val_111 -113 val_113 -113 val_113 -114 val_114 -116 val_116 -118 val_118 -118 val_118 -119 val_119 -119 val_119 -119 val_119 -12 val_12 -12 val_12 -120 val_120 -120 val_120 -125 val_125 -125 val_125 -126 val_126 -128 val_128 -128 val_128 -128 val_128 -129 val_129 -129 val_129 -131 val_131 -133 val_133 -134 val_134 -134 val_134 -136 val_136 -137 val_137 -137 val_137 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -143 val_143 -145 val_145 -146 val_146 -146 val_146 -149 val_149 -149 val_149 -15 val_15 -15 val_15 -150 val_150 -152 val_152 -152 val_152 -153 val_153 -155 val_155 -156 val_156 -157 val_157 -158 val_158 -160 val_160 -162 val_162 -163 val_163 -164 val_164 -164 val_164 -165 val_165 -165 val_165 -166 val_166 -167 val_167 -167 val_167 -167 val_167 -168 val_168 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -17 val_17 -170 val_170 -172 val_172 -172 val_172 -174 val_174 -174 val_174 -175 val_175 -175 val_175 -176 val_176 -176 val_176 -177 val_177 -178 val_178 -179 val_179 -179 val_179 -18 val_18 -18 val_18 -180 val_180 -181 val_181 -183 val_183 -186 val_186 -187 val_187 -187 val_187 -187 val_187 -189 val_189 -19 val_19 -190 val_190 -191 val_191 -191 val_191 -192 val_192 -193 val_193 -193 val_193 -193 val_193 -194 val_194 -195 val_195 -195 val_195 -196 val_196 -197 val_197 -197 val_197 -199 val_199 -199 val_199 -199 val_199 -2 val_2 -Warning: Map Join MAPJOIN[55][bigTable=b] in task 'Stage-13:MAPRED' is a cross product -Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -POSTHOOK: query: explain -from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-14 depends on stages: Stage-10 , consists of Stage-17, Stage-2 - Stage-17 has a backup stage: Stage-2 - Stage-13 depends on stages: Stage-17 - Stage-15 depends on stages: Stage-2, Stage-13 - Stage-4 depends on stages: Stage-15 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 - Stage-16 depends on stages: Stage-2, Stage-13 - Stage-12 depends on stages: Stage-16 - Stage-0 depends on stages: Stage-12 - Stage-7 depends on stages: Stage-0 - Stage-2 - -STAGE PLANS: - Stage: Stage-10 - Map Reduce - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '2') and key is null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: 0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-14 - Conditional Operator - - Stage: Stage-17 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 - 1 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-15 - Map Reduce Local Work - Alias -> Map Local Tables: - sq_2:s1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - sq_2:s1 - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '2') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col5 is null (type: boolean) - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 - - Stage: Stage-5 - Stats-Aggr Operator - - Stage: Stage-16 - Map Reduce Local Work - Alias -> Map Local Tables: - sq_1:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - sq_1:a - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - - Stage: Stage-7 - Stats-Aggr Operator - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - -Warning: Map Join MAPJOIN[55][bigTable=b] in task 'Stage-13:MAPRED' is a cross product -Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@src_4 -PREHOOK: Output: default@src_5 -POSTHOOK: query: from src b -INSERT OVERWRITE TABLE src_4 - select * - where b.key in - (select a.key - from src a - where b.value = a.value and a.key > '9' - ) -INSERT OVERWRITE TABLE src_5 - select * - where b.key not in ( select key from src s1 where s1.key > '2') - order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@src_4 -POSTHOOK: Output: default@src_5 -POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-10:MAPRED -RUN: Stage-14:CONDITIONAL -RUN: Stage-17:MAPREDLOCAL -RUN: Stage-13:MAPRED -RUN: Stage-15:MAPREDLOCAL -RUN: Stage-16:MAPREDLOCAL -RUN: Stage-4:MAPRED -RUN: Stage-12:MAPRED -RUN: Stage-1:MOVE -RUN: Stage-0:MOVE -RUN: Stage-5:STATS -RUN: Stage-7:STATS -PREHOOK: query: select * from src_4 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_4 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_4 -#### A masked pattern was here #### -90 val_90 -90 val_90 -90 val_90 -92 val_92 -95 val_95 -95 val_95 -96 val_96 -97 val_97 -97 val_97 -98 val_98 -98 val_98 -PREHOOK: query: select * from src_5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_5 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_5 -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -10 val_10 -100 val_100 -100 val_100 -103 val_103 -103 val_103 -104 val_104 -104 val_104 -105 val_105 -11 val_11 -111 val_111 -113 val_113 -113 val_113 -114 val_114 -116 val_116 -118 val_118 -118 val_118 -119 val_119 -119 val_119 -119 val_119 -12 val_12 -12 val_12 -120 val_120 -120 val_120 -125 val_125 -125 val_125 -126 val_126 -128 val_128 -128 val_128 -128 val_128 -129 val_129 -129 val_129 -131 val_131 -133 val_133 -134 val_134 -134 val_134 -136 val_136 -137 val_137 -137 val_137 -138 val_138 -138 val_138 -138 val_138 -138 val_138 -143 val_143 -145 val_145 -146 val_146 -146 val_146 -149 val_149 -149 val_149 -15 val_15 -15 val_15 -150 val_150 -152 val_152 -152 val_152 -153 val_153 -155 val_155 -156 val_156 -157 val_157 -158 val_158 -160 val_160 -162 val_162 -163 val_163 -164 val_164 -164 val_164 -165 val_165 -165 val_165 -166 val_166 -167 val_167 -167 val_167 -167 val_167 -168 val_168 -169 val_169 -169 val_169 -169 val_169 -169 val_169 -17 val_17 -170 val_170 -172 val_172 -172 val_172 -174 val_174 -174 val_174 -175 val_175 -175 val_175 -176 val_176 -176 val_176 -177 val_177 -178 val_178 -179 val_179 -179 val_179 -18 val_18 -18 val_18 -180 val_180 -181 val_181 -183 val_183 -186 val_186 -187 val_187 -187 val_187 -187 val_187 -189 val_189 -19 val_19 -190 val_190 -191 val_191 -191 val_191 -192 val_192 -193 val_193 -193 val_193 -193 val_193 -194 val_194 -195 val_195 -195 val_195 -196 val_196 -197 val_197 -197 val_197 -199 val_199 -199 val_199 -199 val_199 -2 val_2 diff --git ql/src/test/results/clientpositive/subquery_multiinsert.q.out ql/src/test/results/clientpositive/subquery_multiinsert.q.out new file mode 100644 index 0000000000000000000000000000000000000000..ff3abc4a42b9e094c92ceffdfb9d3a2840682876 --- /dev/null +++ ql/src/test/results/clientpositive/subquery_multiinsert.q.out @@ -0,0 +1,997 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE src_4( + key STRING, + value STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_4 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE src_4( + key STRING, + value STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_4 +RUN: Stage-0:DDL +PREHOOK: query: CREATE TABLE src_5( + key STRING, + value STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_5 +POSTHOOK: query: CREATE TABLE src_5( + key STRING, + value STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_5 +RUN: Stage-0:DDL +Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: explain +from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-10 is a root stage + Stage-2 depends on stages: Stage-10 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '2') and key is null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: 0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is null (type: boolean) + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_5 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_5 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + + Stage: Stage-7 + Stats-Aggr Operator + +Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_4 +PREHOOK: Output: default@src_5 +POSTHOOK: query: from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_4 +POSTHOOK: Output: default@src_5 +POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +RUN: Stage-10:MAPRED +RUN: Stage-2:MAPRED +RUN: Stage-3:MAPRED +RUN: Stage-6:MAPRED +RUN: Stage-4:MAPRED +RUN: Stage-0:MOVE +RUN: Stage-1:MOVE +RUN: Stage-7:STATS +RUN: Stage-5:STATS +PREHOOK: query: select * from src_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_4 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_4 +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: select * from src_5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_5 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_5 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +Warning: Map Join MAPJOIN[55][bigTable=b] in task 'Stage-13:MAPRED' is a cross product +Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: explain +from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-10 is a root stage + Stage-14 depends on stages: Stage-10 , consists of Stage-17, Stage-2 + Stage-17 has a backup stage: Stage-2 + Stage-13 depends on stages: Stage-17 + Stage-15 depends on stages: Stage-2, Stage-13 + Stage-4 depends on stages: Stage-15 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + Stage-16 depends on stages: Stage-2, Stage-13 + Stage-12 depends on stages: Stage-16 + Stage-0 depends on stages: Stage-12 + Stage-7 depends on stages: Stage-0 + Stage-2 + +STAGE PLANS: + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '2') and key is null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: 0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-14 + Conditional Operator + + Stage: Stage-17 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-13 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-15 + Map Reduce Local Work + Alias -> Map Local Tables: + sq_2:s1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + sq_2:s1 + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is null (type: boolean) + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_5 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_5 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-16 + Map Reduce Local Work + Alias -> Map Local Tables: + sq_1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + sq_1:a + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + + Stage: Stage-12 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + + Stage: Stage-7 + Stats-Aggr Operator + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + +Warning: Map Join MAPJOIN[55][bigTable=b] in task 'Stage-13:MAPRED' is a cross product +Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_4 +PREHOOK: Output: default@src_5 +POSTHOOK: query: from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_4 +POSTHOOK: Output: default@src_5 +POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +RUN: Stage-10:MAPRED +RUN: Stage-14:CONDITIONAL +RUN: Stage-17:MAPREDLOCAL +RUN: Stage-13:MAPRED +RUN: Stage-15:MAPREDLOCAL +RUN: Stage-16:MAPREDLOCAL +RUN: Stage-4:MAPRED +RUN: Stage-12:MAPRED +RUN: Stage-1:MOVE +RUN: Stage-0:MOVE +RUN: Stage-5:STATS +RUN: Stage-7:STATS +PREHOOK: query: select * from src_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_4 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_4 +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: select * from src_5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_5 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_5 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 diff --git ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out deleted file mode 100644 index 70f959150c58c85d65d958025dfbbf4dd80586dd..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out +++ /dev/null @@ -1,766 +0,0 @@ -Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: -- non agg, non corr --- JAVA_VERSION_SPECIFIC_OUTPUT - -explain -select key, count(*) -from src -group by key -having key not in - ( select key from src s1 - where s1.key > '12' - ) -PREHOOK: type: QUERY -POSTHOOK: query: -- non agg, non corr --- JAVA_VERSION_SPECIFIC_OUTPUT - -explain -select key, count(*) -from src -group by key -having key not in - ( select key from src s1 - where s1.key > '12' - ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-3 depends on stages: Stage-2 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: bigint) - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '12') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '12') and key is null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join JOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: -- non agg, corr -explain -select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a - where min(p_retailprice) = l and r - l > 600 - ) -PREHOOK: type: QUERY -POSTHOOK: query: -- non agg, corr -explain -select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a - where min(p_retailprice) = l and r - l > 600 - ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 - Stage-3 depends on stages: Stage-2, Stage-6 - Stage-4 is a root stage - Stage-5 depends on stages: Stage-4 - Stage-6 is a root stage - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_retailprice (type: double) - outputColumnNames: p_mfgr, p_retailprice - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(p_retailprice) - keys: p_mfgr (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: double) - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: double) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: double) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: double) - 1 _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_retailprice (type: double) - outputColumnNames: p_mfgr, p_retailprice - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(p_retailprice), max(p_retailprice) - keys: p_mfgr (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col2 - _col1) > 600.0) and (_col0 is null or _col1 is null)) (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_retailprice (type: double) - outputColumnNames: p_mfgr, p_retailprice - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(p_retailprice), max(p_retailprice) - keys: p_mfgr (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col2 - _col1) > 600.0) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join JOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a - where min(p_retailprice) = l and r - l > 600 - ) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a - where min(p_retailprice) = l and r - l > 600 - ) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -Manufacturer#1 1173.15 -Manufacturer#2 1690.68 -Warning: Shuffle Join JOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: -- agg, non corr -explain -select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from part a - group by p_mfgr - having max(p_retailprice) - min(p_retailprice) > 600 - ) -PREHOOK: type: QUERY -POSTHOOK: query: -- agg, non corr -explain -select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from part a - group by p_mfgr - having max(p_retailprice) - min(p_retailprice) > 600 - ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 - Stage-3 depends on stages: Stage-2, Stage-6 - Stage-4 is a root stage - Stage-5 depends on stages: Stage-4 - Stage-6 is a root stage - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_retailprice (type: double) - outputColumnNames: p_mfgr, p_retailprice - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(p_retailprice) - keys: p_mfgr (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: double) - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: null (type: string), p_retailprice (type: double) - outputColumnNames: p_mfgr, p_retailprice - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(p_retailprice), min(p_retailprice) - keys: p_mfgr (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: double), _col2 (type: double) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 - _col2) > 600.0) (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_retailprice (type: double) - outputColumnNames: p_mfgr, p_retailprice - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(p_retailprice), min(p_retailprice) - keys: p_mfgr (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 - _col2) > 600.0) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join JOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from part a - group by p_mfgr - having max(p_retailprice) - min(p_retailprice) > 600 - ) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from part a - group by p_mfgr - having max(p_retailprice) - min(p_retailprice) > 600 - ) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -Manufacturer#1 1173.15 -Manufacturer#2 1690.68 diff --git ql/src/test/results/clientpositive/subquery_notin_having.q.java1.8.out ql/src/test/results/clientpositive/subquery_notin_having.q.java1.8.out deleted file mode 100644 index 4e227cd9d02ba6f28d9295aaf03adf15b104a3d8..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/subquery_notin_having.q.java1.8.out +++ /dev/null @@ -1,762 +0,0 @@ -Warning: Shuffle Join JOIN[26][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: -- non agg, non corr --- JAVA_VERSION_SPECIFIC_OUTPUT - -explain -select key, count(*) -from src -group by key -having key not in - ( select key from src s1 - where s1.key > '12' - ) -PREHOOK: type: QUERY -POSTHOOK: query: -- non agg, non corr --- JAVA_VERSION_SPECIFIC_OUTPUT - -explain -select key, count(*) -from src -group by key -having key not in - ( select key from src s1 - where s1.key > '12' - ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-3 depends on stages: Stage-2 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: bigint) - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '12') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '12') and key is null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join JOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: -- non agg, corr -explain -select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a - where min(p_retailprice) = l and r - l > 600 - ) -PREHOOK: type: QUERY -POSTHOOK: query: -- non agg, corr -explain -select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a - where min(p_retailprice) = l and r - l > 600 - ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 - Stage-3 depends on stages: Stage-2, Stage-6 - Stage-4 is a root stage - Stage-5 depends on stages: Stage-4 - Stage-6 is a root stage - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_retailprice (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: double) - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: double) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: double) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: double) - 1 _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_retailprice (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col1), max(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col2 - _col1) > 600.0) and (_col0 is null or _col1 is null)) (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_retailprice (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col1), max(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col2 - _col1) > 600.0) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join JOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product -PREHOOK: query: select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a - where min(p_retailprice) = l and r - l > 600 - ) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a - where min(p_retailprice) = l and r - l > 600 - ) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -Manufacturer#1 1173.15 -Manufacturer#2 1690.68 -Warning: Shuffle Join JOIN[39][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-3:MAPRED' is a cross product -PREHOOK: query: -- agg, non corr -explain -select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from part a - group by p_mfgr - having max(p_retailprice) - min(p_retailprice) > 600 - ) -PREHOOK: type: QUERY -POSTHOOK: query: -- agg, non corr -explain -select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from part a - group by p_mfgr - having max(p_retailprice) - min(p_retailprice) > 600 - ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-3 depends on stages: Stage-2, Stage-6 - Stage-4 is a root stage - Stage-5 is a root stage - Stage-6 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_retailprice (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is null (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: double) - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_retailprice (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(_col1), min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 - _col2) > 600.0) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: null (type: string), p_retailprice (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(_col1), min(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: double), _col2 (type: double) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 - _col2) > 600.0) (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join JOIN[39][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-3:MAPRED' is a cross product -PREHOOK: query: select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from part a - group by p_mfgr - having max(p_retailprice) - min(p_retailprice) > 600 - ) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select b.p_mfgr, min(p_retailprice) -from part b -group by b.p_mfgr -having b.p_mfgr not in - (select p_mfgr - from part a - group by p_mfgr - having max(p_retailprice) - min(p_retailprice) > 600 - ) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -Manufacturer#2 1690.68 -Manufacturer#1 1173.15 diff --git ql/src/test/results/clientpositive/subquery_notin_having.q.out ql/src/test/results/clientpositive/subquery_notin_having.q.out new file mode 100644 index 0000000000000000000000000000000000000000..6aeac65d4005ddf6fa719071bced9e6bfeb7d457 --- /dev/null +++ ql/src/test/results/clientpositive/subquery_notin_having.q.out @@ -0,0 +1,764 @@ +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: -- non agg, non corr + +explain +select key, count(*) +from src +group by key +having key not in + ( select key from src s1 + where s1.key > '12' + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, non corr + +explain +select key, count(*) +from src +group by key +having key not in + ( select key from src s1 + where s1.key > '12' + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '12') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '12') and key is null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: -- non agg, corr +explain +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, corr +explain +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-4 is a root stage + Stage-5 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: p_mfgr, p_retailprice + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(p_retailprice) + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: double) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: double) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: double) + 1 _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: p_mfgr, p_retailprice + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(p_retailprice), max(p_retailprice) + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col2 - _col1) > 600.0) and (_col0 is null or _col1 is null)) (type: boolean) + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: p_mfgr, p_retailprice + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(p_retailprice), max(p_retailprice) + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col2 - _col1) > 600.0) (type: boolean) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 1173.15 +Manufacturer#2 1690.68 +Warning: Shuffle Join JOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: -- agg, non corr +explain +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- agg, non corr +explain +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-4 is a root stage + Stage-5 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: p_mfgr, p_retailprice + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(p_retailprice) + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_mfgr is null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: null (type: string), p_retailprice (type: double) + outputColumnNames: p_mfgr, p_retailprice + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(p_retailprice), min(p_retailprice) + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: double), _col2 (type: double) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 - _col2) > 600.0) (type: boolean) + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: p_mfgr, p_retailprice + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(p_retailprice), min(p_retailprice) + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 - _col2) > 600.0) (type: boolean) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 1173.15 +Manufacturer#2 1690.68 diff --git ql/src/test/results/clientpositive/tez/join0.q.java1.7.out ql/src/test/results/clientpositive/tez/join0.q.java1.7.out deleted file mode 100644 index 59d90871364b963b097117bdc45eacdedd785c0f..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/tez/join0.q.java1.7.out +++ /dev/null @@ -1,239 +0,0 @@ -Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT --- SORT_QUERY_RESULTS - -EXPLAIN -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT --- SORT_QUERY_RESULTS - -EXPLAIN -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Map 4 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN FORMATTED -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN FORMATTED -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -#### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 2 val_2 -0 val_0 2 val_2 -0 val_0 2 val_2 -0 val_0 4 val_4 -0 val_0 4 val_4 -0 val_0 4 val_4 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 8 val_8 -0 val_0 8 val_8 -0 val_0 8 val_8 -0 val_0 9 val_9 -0 val_0 9 val_9 -0 val_0 9 val_9 -2 val_2 0 val_0 -2 val_2 0 val_0 -2 val_2 0 val_0 -2 val_2 2 val_2 -2 val_2 4 val_4 -2 val_2 5 val_5 -2 val_2 5 val_5 -2 val_2 5 val_5 -2 val_2 8 val_8 -2 val_2 9 val_9 -4 val_4 0 val_0 -4 val_4 0 val_0 -4 val_4 0 val_0 -4 val_4 2 val_2 -4 val_4 4 val_4 -4 val_4 5 val_5 -4 val_4 5 val_5 -4 val_4 5 val_5 -4 val_4 8 val_8 -4 val_4 9 val_9 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 2 val_2 -5 val_5 2 val_2 -5 val_5 2 val_2 -5 val_5 4 val_4 -5 val_5 4 val_4 -5 val_5 4 val_4 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 8 val_8 -5 val_5 8 val_8 -5 val_5 8 val_8 -5 val_5 9 val_9 -5 val_5 9 val_9 -5 val_5 9 val_9 -8 val_8 0 val_0 -8 val_8 0 val_0 -8 val_8 0 val_0 -8 val_8 2 val_2 -8 val_8 4 val_4 -8 val_8 5 val_5 -8 val_8 5 val_5 -8 val_8 5 val_5 -8 val_8 8 val_8 -8 val_8 9 val_9 -9 val_9 0 val_0 -9 val_9 0 val_0 -9 val_9 0 val_0 -9 val_9 2 val_2 -9 val_9 4 val_4 -9 val_9 5 val_5 -9 val_9 5 val_5 -9 val_9 5 val_5 -9 val_9 8 val_8 -9 val_9 9 val_9 diff --git ql/src/test/results/clientpositive/tez/join0.q.java1.8.out ql/src/test/results/clientpositive/tez/join0.q.java1.8.out deleted file mode 100644 index 10d780200944dd96052c8ef989a211e225dfc4f0..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/tez/join0.q.java1.8.out +++ /dev/null @@ -1,236 +0,0 @@ -Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -EXPLAIN -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -EXPLAIN -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Map 4 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN FORMATTED -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN FORMATTED -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -#### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 2 val_2 -0 val_0 2 val_2 -0 val_0 2 val_2 -0 val_0 4 val_4 -0 val_0 4 val_4 -0 val_0 4 val_4 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 8 val_8 -0 val_0 8 val_8 -0 val_0 8 val_8 -0 val_0 9 val_9 -0 val_0 9 val_9 -0 val_0 9 val_9 -2 val_2 0 val_0 -2 val_2 0 val_0 -2 val_2 0 val_0 -2 val_2 2 val_2 -2 val_2 4 val_4 -2 val_2 5 val_5 -2 val_2 5 val_5 -2 val_2 5 val_5 -2 val_2 8 val_8 -2 val_2 9 val_9 -4 val_4 0 val_0 -4 val_4 0 val_0 -4 val_4 0 val_0 -4 val_4 2 val_2 -4 val_4 4 val_4 -4 val_4 5 val_5 -4 val_4 5 val_5 -4 val_4 5 val_5 -4 val_4 8 val_8 -4 val_4 9 val_9 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 2 val_2 -5 val_5 2 val_2 -5 val_5 2 val_2 -5 val_5 4 val_4 -5 val_5 4 val_4 -5 val_5 4 val_4 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 5 val_5 -5 val_5 8 val_8 -5 val_5 8 val_8 -5 val_5 8 val_8 -5 val_5 9 val_9 -5 val_5 9 val_9 -5 val_5 9 val_9 -8 val_8 0 val_0 -8 val_8 0 val_0 -8 val_8 0 val_0 -8 val_8 2 val_2 -8 val_8 4 val_4 -8 val_8 5 val_5 -8 val_8 5 val_5 -8 val_8 5 val_5 -8 val_8 8 val_8 -8 val_8 9 val_9 -9 val_9 0 val_0 -9 val_9 0 val_0 -9 val_9 0 val_0 -9 val_9 2 val_2 -9 val_9 4 val_4 -9 val_9 5 val_5 -9 val_9 5 val_5 -9 val_9 5 val_5 -9 val_9 8 val_8 -9 val_9 9 val_9 diff --git ql/src/test/results/clientpositive/tez/join0.q.out ql/src/test/results/clientpositive/tez/join0.q.out new file mode 100644 index 0000000000000000000000000000000000000000..67d71d5bee44b3015f055ca1437f8a4caeadeb99 --- /dev/null +++ ql/src/test/results/clientpositive/tez/join0.q.out @@ -0,0 +1,237 @@ +Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +PREHOOK: type: QUERY +POSTHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: EXPLAIN FORMATTED +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FORMATTED +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +POSTHOOK: type: QUERY +#### A masked pattern was here #### +Warning: Shuffle Join MERGEJOIN[15][tables = [src1, src2]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 2 val_2 +0 val_0 2 val_2 +0 val_0 2 val_2 +0 val_0 4 val_4 +0 val_0 4 val_4 +0 val_0 4 val_4 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 8 val_8 +0 val_0 8 val_8 +0 val_0 8 val_8 +0 val_0 9 val_9 +0 val_0 9 val_9 +0 val_0 9 val_9 +2 val_2 0 val_0 +2 val_2 0 val_0 +2 val_2 0 val_0 +2 val_2 2 val_2 +2 val_2 4 val_4 +2 val_2 5 val_5 +2 val_2 5 val_5 +2 val_2 5 val_5 +2 val_2 8 val_8 +2 val_2 9 val_9 +4 val_4 0 val_0 +4 val_4 0 val_0 +4 val_4 0 val_0 +4 val_4 2 val_2 +4 val_4 4 val_4 +4 val_4 5 val_5 +4 val_4 5 val_5 +4 val_4 5 val_5 +4 val_4 8 val_8 +4 val_4 9 val_9 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 2 val_2 +5 val_5 2 val_2 +5 val_5 2 val_2 +5 val_5 4 val_4 +5 val_5 4 val_4 +5 val_5 4 val_4 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 8 val_8 +5 val_5 8 val_8 +5 val_5 8 val_8 +5 val_5 9 val_9 +5 val_5 9 val_9 +5 val_5 9 val_9 +8 val_8 0 val_0 +8 val_8 0 val_0 +8 val_8 0 val_0 +8 val_8 2 val_2 +8 val_8 4 val_4 +8 val_8 5 val_5 +8 val_8 5 val_5 +8 val_8 5 val_5 +8 val_8 8 val_8 +8 val_8 9 val_9 +9 val_9 0 val_0 +9 val_9 0 val_0 +9 val_9 0 val_0 +9 val_9 2 val_2 +9 val_9 4 val_4 +9 val_9 5 val_5 +9 val_9 5 val_5 +9 val_9 5 val_5 +9 val_9 8 val_8 +9 val_9 9 val_9 diff --git ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.7.out ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.7.out deleted file mode 100644 index 420e7881636338d1a829d4c2609498e6864cc23d..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.7.out +++ /dev/null @@ -1,218 +0,0 @@ -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE over1korc -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE over1korc -POSTHOOK: type: DROPTABLE -PREHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1k -POSTHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1k -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@over1k -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@over1k -PREHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1korc -POSTHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1korc -PREHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -PREHOOK: type: QUERY -PREHOOK: Input: default@over1k -PREHOOK: Output: default@over1korc -POSTHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1k -POSTHOOK: Output: default@over1korc -POSTHOOK: Lineage: over1korc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ] -POSTHOOK: Lineage: over1korc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ] -POSTHOOK: Lineage: over1korc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ] -POSTHOOK: Lineage: over1korc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] -POSTHOOK: Lineage: over1korc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -POSTHOOK: Lineage: over1korc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] -POSTHOOK: Lineage: over1korc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] -POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ] -POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] -POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] -POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(50), avg(50.0), avg(50) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) - Reducer 3 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@over1korc -#### A masked pattern was here #### -POSTHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1korc -#### A masked pattern was here #### -65536 50.0 50.0 50.0000 -65537 50.0 50.0 50.0000 -65538 50.0 50.0 50.0000 -65539 50.0 50.0 50.0000 -65540 50.0 50.0 50.0000 -65541 50.0 50.0 50.0000 -65542 50.0 50.0 50.0000 -65543 50.0 50.0 50.0000 -65544 50.0 50.0 50.0000 -65545 50.0 50.0 50.0000 diff --git ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.8.out ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.8.out deleted file mode 100644 index 331edd0296c8f8d94b2ed88b3d9867eb2feb4995..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/tez/vector_cast_constant.q.java1.8.out +++ /dev/null @@ -1,216 +0,0 @@ -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE over1korc -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE over1korc -POSTHOOK: type: DROPTABLE -PREHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1k -POSTHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1k -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@over1k -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@over1k -PREHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1korc -POSTHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1korc -PREHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -PREHOOK: type: QUERY -PREHOOK: Input: default@over1k -PREHOOK: Output: default@over1korc -POSTHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1k -POSTHOOK: Output: default@over1korc -POSTHOOK: Lineage: over1korc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ] -POSTHOOK: Lineage: over1korc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ] -POSTHOOK: Lineage: over1korc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ] -POSTHOOK: Lineage: over1korc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] -POSTHOOK: Lineage: over1korc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -POSTHOOK: Lineage: over1korc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] -POSTHOOK: Lineage: over1korc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] -POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ] -POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] -POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] -POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(50), avg(50.0), avg(50) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) - Reducer 3 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@over1korc -#### A masked pattern was here #### -POSTHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1korc -#### A masked pattern was here #### -65536 50.0 50.0 50 -65537 50.0 50.0 50 -65538 50.0 50.0 50 -65539 50.0 50.0 50 -65540 50.0 50.0 50 -65541 50.0 50.0 50 -65542 50.0 50.0 50 -65543 50.0 50.0 50 -65544 50.0 50.0 50 -65545 50.0 50.0 50 diff --git ql/src/test/results/clientpositive/tez/vector_cast_constant.q.out ql/src/test/results/clientpositive/tez/vector_cast_constant.q.out new file mode 100644 index 0000000000000000000000000000000000000000..46b13c8462e8fcf5704d390a2b6eade4b0dc11ef --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_cast_constant.q.out @@ -0,0 +1,214 @@ +PREHOOK: query: DROP TABLE over1k +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE over1k +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE over1korc +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE over1korc +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k +POSTHOOK: query: -- data setup +CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over1k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over1k +PREHOOK: query: CREATE TABLE over1korc(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1korc +POSTHOOK: query: CREATE TABLE over1korc(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1korc +PREHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k +PREHOOK: Output: default@over1korc +POSTHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k +POSTHOOK: Output: default@over1korc +POSTHOOK: Lineage: over1korc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: over1korc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: over1korc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: over1korc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: over1korc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: over1korc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: over1korc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] +PREHOOK: query: EXPLAIN SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, + AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, + AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1korc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(50), avg(50.0), avg(50) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) + Reducer 3 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, + AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1korc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + i, + AVG(CAST(50 AS INT)) AS `avg_int_ok`, + AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, + AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1korc +#### A masked pattern was here #### +65536 50.0 50.0 50.0000 +65537 50.0 50.0 50.0000 +65538 50.0 50.0 50.0000 +65539 50.0 50.0 50.0000 +65540 50.0 50.0 50.0000 +65541 50.0 50.0 50.0000 +65542 50.0 50.0 50.0000 +65543 50.0 50.0 50.0000 +65544 50.0 50.0 50.0000 +65545 50.0 50.0 50.0000 diff --git ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out deleted file mode 100644 index 459d93b99d7c57108945db7d95bd612e33029900..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out +++ /dev/null @@ -1,457 +0,0 @@ -PREHOOK: query: drop table varchar_udf_1 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table varchar_udf_1 -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@varchar_udf_1 -POSTHOOK: query: create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@varchar_udf_1 -PREHOOK: query: insert overwrite table varchar_udf_1 - select key, value, key, value from src where key = '238' limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@varchar_udf_1 -POSTHOOK: query: insert overwrite table varchar_udf_1 - select key, value, key, value from src where key = '238' limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@varchar_udf_1 -POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - --- UDFs with varchar support -select - concat(c1, c2), - concat(c3, c4), - concat(c1, c2) = concat(c3, c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - --- UDFs with varchar support -select - concat(c1, c2), - concat(c3, c4), - concat(c1, c2) = concat(c3, c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -238val_238 238val_238 true -PREHOOK: query: select - upper(c2), - upper(c4), - upper(c2) = upper(c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - upper(c2), - upper(c4), - upper(c2) = upper(c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -VAL_238 VAL_238 true -PREHOOK: query: select - lower(c2), - lower(c4), - lower(c2) = lower(c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - lower(c2), - lower(c4), - lower(c2) = lower(c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: -- Scalar UDFs -select - ascii(c2), - ascii(c4), - ascii(c2) = ascii(c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: -- Scalar UDFs -select - ascii(c2), - ascii(c4), - ascii(c2) = ascii(c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -118 118 true -PREHOOK: query: select - concat_ws('|', c1, c2), - concat_ws('|', c3, c4), - concat_ws('|', c1, c2) = concat_ws('|', c3, c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - concat_ws('|', c1, c2), - concat_ws('|', c3, c4), - concat_ws('|', c1, c2) = concat_ws('|', c3, c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -238|val_238 238|val_238 true -PREHOOK: query: select - decode(encode(c2, 'US-ASCII'), 'US-ASCII'), - decode(encode(c4, 'US-ASCII'), 'US-ASCII'), - decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - decode(encode(c2, 'US-ASCII'), 'US-ASCII'), - decode(encode(c4, 'US-ASCII'), 'US-ASCII'), - decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: select - instr(c2, '_'), - instr(c4, '_'), - instr(c2, '_') = instr(c4, '_') -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - instr(c2, '_'), - instr(c4, '_'), - instr(c2, '_') = instr(c4, '_') -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -4 4 true -PREHOOK: query: select - length(c2), - length(c4), - length(c2) = length(c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - length(c2), - length(c4), - length(c2) = length(c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -7 7 true -PREHOOK: query: select - locate('a', 'abcdabcd', 3), - locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), - locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - locate('a', 'abcdabcd', 3), - locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), - locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -5 5 true -PREHOOK: query: select - lpad(c2, 15, ' '), - lpad(c4, 15, ' '), - lpad(c2, 15, ' ') = lpad(c4, 15, ' ') -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - lpad(c2, 15, ' '), - lpad(c4, 15, ' '), - lpad(c2, 15, ' ') = lpad(c4, 15, ' ') -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### - val_238 val_238 true -PREHOOK: query: select - ltrim(c2), - ltrim(c4), - ltrim(c2) = ltrim(c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - ltrim(c2), - ltrim(c4), - ltrim(c2) = ltrim(c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: select - c2 regexp 'val', - c4 regexp 'val', - (c2 regexp 'val') = (c4 regexp 'val') -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - c2 regexp 'val', - c4 regexp 'val', - (c2 regexp 'val') = (c4 regexp 'val') -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -true true true -PREHOOK: query: select - regexp_extract(c2, 'val_([0-9]+)', 1), - regexp_extract(c4, 'val_([0-9]+)', 1), - regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - regexp_extract(c2, 'val_([0-9]+)', 1), - regexp_extract(c4, 'val_([0-9]+)', 1), - regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -238 238 true -PREHOOK: query: select - regexp_replace(c2, 'val', 'replaced'), - regexp_replace(c4, 'val', 'replaced'), - regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - regexp_replace(c2, 'val', 'replaced'), - regexp_replace(c4, 'val', 'replaced'), - regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -replaced_238 replaced_238 true -PREHOOK: query: select - reverse(c2), - reverse(c4), - reverse(c2) = reverse(c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - reverse(c2), - reverse(c4), - reverse(c2) = reverse(c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -832_lav 832_lav true -PREHOOK: query: select - rpad(c2, 15, ' '), - rpad(c4, 15, ' '), - rpad(c2, 15, ' ') = rpad(c4, 15, ' ') -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - rpad(c2, 15, ' '), - rpad(c4, 15, ' '), - rpad(c2, 15, ' ') = rpad(c4, 15, ' ') -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: select - rtrim(c2), - rtrim(c4), - rtrim(c2) = rtrim(c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - rtrim(c2), - rtrim(c4), - rtrim(c2) = rtrim(c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: select - sentences('See spot run. See jane run.'), - sentences(cast('See spot run. See jane run.' as varchar(50))) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - sentences('See spot run. See jane run.'), - sentences(cast('See spot run. See jane run.' as varchar(50))) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -[["See","spot","run"],["See","jane","run"]] [["See","spot","run"],["See","jane","run"]] -PREHOOK: query: select - split(c2, '_'), - split(c4, '_') -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - split(c2, '_'), - split(c4, '_') -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -["val","238"] ["val","238"] -PREHOOK: query: select - str_to_map('a:1,b:2,c:3',',',':'), - str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - str_to_map('a:1,b:2,c:3',',',':'), - str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -{"b":"2","a":"1","c":"3"} {"b":"2","a":"1","c":"3"} -PREHOOK: query: select - substr(c2, 1, 3), - substr(c4, 1, 3), - substr(c2, 1, 3) = substr(c4, 1, 3) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - substr(c2, 1, 3), - substr(c4, 1, 3), - substr(c2, 1, 3) = substr(c4, 1, 3) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -val val true -PREHOOK: query: select - trim(c2), - trim(c4), - trim(c2) = trim(c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - trim(c2), - trim(c4), - trim(c2) = trim(c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: -- Aggregate Functions -select - compute_stats(c2, 16), - compute_stats(c4, 16) -from varchar_udf_1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: -- Aggregate Functions -select - compute_stats(c2, 16), - compute_stats(c4, 16) -from varchar_udf_1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} -PREHOOK: query: select - min(c2), - min(c4) -from varchar_udf_1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - min(c2), - min(c4) -from varchar_udf_1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -val_238 val_238 -PREHOOK: query: select - max(c2), - max(c4) -from varchar_udf_1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - max(c2), - max(c4) -from varchar_udf_1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -val_238 val_238 -PREHOOK: query: drop table varchar_udf_1 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@varchar_udf_1 -PREHOOK: Output: default@varchar_udf_1 -POSTHOOK: query: drop table varchar_udf_1 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@varchar_udf_1 -POSTHOOK: Output: default@varchar_udf_1 diff --git ql/src/test/results/clientpositive/varchar_udf1.q.java1.8.out ql/src/test/results/clientpositive/varchar_udf1.q.java1.8.out deleted file mode 100644 index ace85682bd9fe659e475464330a58d2e0b38ef74..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/varchar_udf1.q.java1.8.out +++ /dev/null @@ -1,457 +0,0 @@ -PREHOOK: query: drop table varchar_udf_1 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table varchar_udf_1 -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@varchar_udf_1 -POSTHOOK: query: create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@varchar_udf_1 -PREHOOK: query: insert overwrite table varchar_udf_1 - select key, value, key, value from src where key = '238' limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@varchar_udf_1 -POSTHOOK: query: insert overwrite table varchar_udf_1 - select key, value, key, value from src where key = '238' limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@varchar_udf_1 -POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - --- UDFs with varchar support -select - concat(c1, c2), - concat(c3, c4), - concat(c1, c2) = concat(c3, c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - --- UDFs with varchar support -select - concat(c1, c2), - concat(c3, c4), - concat(c1, c2) = concat(c3, c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -238val_238 238val_238 true -PREHOOK: query: select - upper(c2), - upper(c4), - upper(c2) = upper(c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - upper(c2), - upper(c4), - upper(c2) = upper(c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -VAL_238 VAL_238 true -PREHOOK: query: select - lower(c2), - lower(c4), - lower(c2) = lower(c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - lower(c2), - lower(c4), - lower(c2) = lower(c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: -- Scalar UDFs -select - ascii(c2), - ascii(c4), - ascii(c2) = ascii(c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: -- Scalar UDFs -select - ascii(c2), - ascii(c4), - ascii(c2) = ascii(c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -118 118 true -PREHOOK: query: select - concat_ws('|', c1, c2), - concat_ws('|', c3, c4), - concat_ws('|', c1, c2) = concat_ws('|', c3, c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - concat_ws('|', c1, c2), - concat_ws('|', c3, c4), - concat_ws('|', c1, c2) = concat_ws('|', c3, c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -238|val_238 238|val_238 true -PREHOOK: query: select - decode(encode(c2, 'US-ASCII'), 'US-ASCII'), - decode(encode(c4, 'US-ASCII'), 'US-ASCII'), - decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - decode(encode(c2, 'US-ASCII'), 'US-ASCII'), - decode(encode(c4, 'US-ASCII'), 'US-ASCII'), - decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: select - instr(c2, '_'), - instr(c4, '_'), - instr(c2, '_') = instr(c4, '_') -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - instr(c2, '_'), - instr(c4, '_'), - instr(c2, '_') = instr(c4, '_') -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -4 4 true -PREHOOK: query: select - length(c2), - length(c4), - length(c2) = length(c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - length(c2), - length(c4), - length(c2) = length(c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -7 7 true -PREHOOK: query: select - locate('a', 'abcdabcd', 3), - locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), - locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - locate('a', 'abcdabcd', 3), - locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), - locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -5 5 true -PREHOOK: query: select - lpad(c2, 15, ' '), - lpad(c4, 15, ' '), - lpad(c2, 15, ' ') = lpad(c4, 15, ' ') -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - lpad(c2, 15, ' '), - lpad(c4, 15, ' '), - lpad(c2, 15, ' ') = lpad(c4, 15, ' ') -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### - val_238 val_238 true -PREHOOK: query: select - ltrim(c2), - ltrim(c4), - ltrim(c2) = ltrim(c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - ltrim(c2), - ltrim(c4), - ltrim(c2) = ltrim(c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: select - regexp(c2, 'val'), - regexp(c4, 'val'), - regexp(c2, 'val') = regexp(c4, 'val') -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - regexp(c2, 'val'), - regexp(c4, 'val'), - regexp(c2, 'val') = regexp(c4, 'val') -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -true true true -PREHOOK: query: select - regexp_extract(c2, 'val_([0-9]+)', 1), - regexp_extract(c4, 'val_([0-9]+)', 1), - regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - regexp_extract(c2, 'val_([0-9]+)', 1), - regexp_extract(c4, 'val_([0-9]+)', 1), - regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -238 238 true -PREHOOK: query: select - regexp_replace(c2, 'val', 'replaced'), - regexp_replace(c4, 'val', 'replaced'), - regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - regexp_replace(c2, 'val', 'replaced'), - regexp_replace(c4, 'val', 'replaced'), - regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -replaced_238 replaced_238 true -PREHOOK: query: select - reverse(c2), - reverse(c4), - reverse(c2) = reverse(c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - reverse(c2), - reverse(c4), - reverse(c2) = reverse(c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -832_lav 832_lav true -PREHOOK: query: select - rpad(c2, 15, ' '), - rpad(c4, 15, ' '), - rpad(c2, 15, ' ') = rpad(c4, 15, ' ') -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - rpad(c2, 15, ' '), - rpad(c4, 15, ' '), - rpad(c2, 15, ' ') = rpad(c4, 15, ' ') -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: select - rtrim(c2), - rtrim(c4), - rtrim(c2) = rtrim(c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - rtrim(c2), - rtrim(c4), - rtrim(c2) = rtrim(c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: select - sentences('See spot run. See jane run.'), - sentences(cast('See spot run. See jane run.' as varchar(50))) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - sentences('See spot run. See jane run.'), - sentences(cast('See spot run. See jane run.' as varchar(50))) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -[["See","spot","run"],["See","jane","run"]] [["See","spot","run"],["See","jane","run"]] -PREHOOK: query: select - split(c2, '_'), - split(c4, '_') -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - split(c2, '_'), - split(c4, '_') -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -["val","238"] ["val","238"] -PREHOOK: query: select - str_to_map('a:1,b:2,c:3',',',':'), - str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - str_to_map('a:1,b:2,c:3',',',':'), - str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -{"a":"1","b":"2","c":"3"} {"a":"1","b":"2","c":"3"} -PREHOOK: query: select - substr(c2, 1, 3), - substr(c4, 1, 3), - substr(c2, 1, 3) = substr(c4, 1, 3) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - substr(c2, 1, 3), - substr(c4, 1, 3), - substr(c2, 1, 3) = substr(c4, 1, 3) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -val val true -PREHOOK: query: select - trim(c2), - trim(c4), - trim(c2) = trim(c4) -from varchar_udf_1 limit 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - trim(c2), - trim(c4), - trim(c2) = trim(c4) -from varchar_udf_1 limit 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -val_238 val_238 true -PREHOOK: query: -- Aggregate Functions -select - compute_stats(c2, 16), - compute_stats(c4, 16) -from varchar_udf_1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: -- Aggregate Functions -select - compute_stats(c2, 16), - compute_stats(c4, 16) -from varchar_udf_1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1} -PREHOOK: query: select - min(c2), - min(c4) -from varchar_udf_1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - min(c2), - min(c4) -from varchar_udf_1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -val_238 val_238 -PREHOOK: query: select - max(c2), - max(c4) -from varchar_udf_1 -PREHOOK: type: QUERY -PREHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -POSTHOOK: query: select - max(c2), - max(c4) -from varchar_udf_1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@varchar_udf_1 -#### A masked pattern was here #### -val_238 val_238 -PREHOOK: query: drop table varchar_udf_1 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@varchar_udf_1 -PREHOOK: Output: default@varchar_udf_1 -POSTHOOK: query: drop table varchar_udf_1 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@varchar_udf_1 -POSTHOOK: Output: default@varchar_udf_1 diff --git ql/src/test/results/clientpositive/varchar_udf1.q.out ql/src/test/results/clientpositive/varchar_udf1.q.out new file mode 100644 index 0000000000000000000000000000000000000000..e5cfce523ff26d47ae7ac1c17cbb22a26f98a92c --- /dev/null +++ ql/src/test/results/clientpositive/varchar_udf1.q.out @@ -0,0 +1,453 @@ +PREHOOK: query: drop table varchar_udf_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_udf_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_udf_1 +POSTHOOK: query: create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_udf_1 +PREHOOK: query: insert overwrite table varchar_udf_1 + select key, value, key, value from src where key = '238' limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@varchar_udf_1 +POSTHOOK: query: insert overwrite table varchar_udf_1 + select key, value, key, value from src where key = '238' limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@varchar_udf_1 +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- UDFs with varchar support +select + concat(c1, c2), + concat(c3, c4), + concat(c1, c2) = concat(c3, c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: -- UDFs with varchar support +select + concat(c1, c2), + concat(c3, c4), + concat(c1, c2) = concat(c3, c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +238val_238 238val_238 true +PREHOOK: query: select + upper(c2), + upper(c4), + upper(c2) = upper(c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + upper(c2), + upper(c4), + upper(c2) = upper(c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +VAL_238 VAL_238 true +PREHOOK: query: select + lower(c2), + lower(c4), + lower(c2) = lower(c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + lower(c2), + lower(c4), + lower(c2) = lower(c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +val_238 val_238 true +PREHOOK: query: -- Scalar UDFs +select + ascii(c2), + ascii(c4), + ascii(c2) = ascii(c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: -- Scalar UDFs +select + ascii(c2), + ascii(c4), + ascii(c2) = ascii(c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +118 118 true +PREHOOK: query: select + concat_ws('|', c1, c2), + concat_ws('|', c3, c4), + concat_ws('|', c1, c2) = concat_ws('|', c3, c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + concat_ws('|', c1, c2), + concat_ws('|', c3, c4), + concat_ws('|', c1, c2) = concat_ws('|', c3, c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +238|val_238 238|val_238 true +PREHOOK: query: select + decode(encode(c2, 'US-ASCII'), 'US-ASCII'), + decode(encode(c4, 'US-ASCII'), 'US-ASCII'), + decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + decode(encode(c2, 'US-ASCII'), 'US-ASCII'), + decode(encode(c4, 'US-ASCII'), 'US-ASCII'), + decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +val_238 val_238 true +PREHOOK: query: select + instr(c2, '_'), + instr(c4, '_'), + instr(c2, '_') = instr(c4, '_') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + instr(c2, '_'), + instr(c4, '_'), + instr(c2, '_') = instr(c4, '_') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +4 4 true +PREHOOK: query: select + length(c2), + length(c4), + length(c2) = length(c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + length(c2), + length(c4), + length(c2) = length(c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +7 7 true +PREHOOK: query: select + locate('a', 'abcdabcd', 3), + locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), + locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + locate('a', 'abcdabcd', 3), + locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), + locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +5 5 true +PREHOOK: query: select + lpad(c2, 15, ' '), + lpad(c4, 15, ' '), + lpad(c2, 15, ' ') = lpad(c4, 15, ' ') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + lpad(c2, 15, ' '), + lpad(c4, 15, ' '), + lpad(c2, 15, ' ') = lpad(c4, 15, ' ') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### + val_238 val_238 true +PREHOOK: query: select + ltrim(c2), + ltrim(c4), + ltrim(c2) = ltrim(c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + ltrim(c2), + ltrim(c4), + ltrim(c2) = ltrim(c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +val_238 val_238 true +PREHOOK: query: select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + c2 regexp 'val', + c4 regexp 'val', + (c2 regexp 'val') = (c4 regexp 'val') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +true true true +PREHOOK: query: select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +238 238 true +PREHOOK: query: select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +replaced_238 replaced_238 true +PREHOOK: query: select + reverse(c2), + reverse(c4), + reverse(c2) = reverse(c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + reverse(c2), + reverse(c4), + reverse(c2) = reverse(c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +832_lav 832_lav true +PREHOOK: query: select + rpad(c2, 15, ' '), + rpad(c4, 15, ' '), + rpad(c2, 15, ' ') = rpad(c4, 15, ' ') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + rpad(c2, 15, ' '), + rpad(c4, 15, ' '), + rpad(c2, 15, ' ') = rpad(c4, 15, ' ') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +val_238 val_238 true +PREHOOK: query: select + rtrim(c2), + rtrim(c4), + rtrim(c2) = rtrim(c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + rtrim(c2), + rtrim(c4), + rtrim(c2) = rtrim(c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +val_238 val_238 true +PREHOOK: query: select + sentences('See spot run. See jane run.'), + sentences(cast('See spot run. See jane run.' as varchar(50))) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + sentences('See spot run. See jane run.'), + sentences(cast('See spot run. See jane run.' as varchar(50))) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +[["See","spot","run"],["See","jane","run"]] [["See","spot","run"],["See","jane","run"]] +PREHOOK: query: select + split(c2, '_'), + split(c4, '_') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + split(c2, '_'), + split(c4, '_') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +["val","238"] ["val","238"] +PREHOOK: query: select + str_to_map('a:1,b:2,c:3',',',':'), + str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + str_to_map('a:1,b:2,c:3',',',':'), + str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +{"a":"1","b":"2","c":"3"} {"a":"1","b":"2","c":"3"} +PREHOOK: query: select + substr(c2, 1, 3), + substr(c4, 1, 3), + substr(c2, 1, 3) = substr(c4, 1, 3) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + substr(c2, 1, 3), + substr(c4, 1, 3), + substr(c2, 1, 3) = substr(c4, 1, 3) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +val val true +PREHOOK: query: select + trim(c2), + trim(c4), + trim(c2) = trim(c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + trim(c2), + trim(c4), + trim(c2) = trim(c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +val_238 val_238 true +PREHOOK: query: -- Aggregate Functions +select + compute_stats(c2, 16), + compute_stats(c4, 16) +from varchar_udf_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: -- Aggregate Functions +select + compute_stats(c2, 16), + compute_stats(c4, 16) +from varchar_udf_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} +PREHOOK: query: select + min(c2), + min(c4) +from varchar_udf_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + min(c2), + min(c4) +from varchar_udf_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +val_238 val_238 +PREHOOK: query: select + max(c2), + max(c4) +from varchar_udf_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + max(c2), + max(c4) +from varchar_udf_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +val_238 val_238 +PREHOOK: query: drop table varchar_udf_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_udf_1 +PREHOOK: Output: default@varchar_udf_1 +POSTHOOK: query: drop table varchar_udf_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_udf_1 +POSTHOOK: Output: default@varchar_udf_1 diff --git ql/src/test/results/clientpositive/vector_cast_constant.q.java1.7.out ql/src/test/results/clientpositive/vector_cast_constant.q.java1.7.out deleted file mode 100644 index 867dd4c96b9cd88a1fe6369f6307ab2d95ce5a84..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/vector_cast_constant.q.java1.7.out +++ /dev/null @@ -1,220 +0,0 @@ -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE over1korc -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE over1korc -POSTHOOK: type: DROPTABLE -PREHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1k -POSTHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1k -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@over1k -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@over1k -PREHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1korc -POSTHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1korc -PREHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -PREHOOK: type: QUERY -PREHOOK: Input: default@over1k -PREHOOK: Output: default@over1korc -POSTHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1k -POSTHOOK: Output: default@over1korc -POSTHOOK: Lineage: over1korc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ] -POSTHOOK: Lineage: over1korc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ] -POSTHOOK: Lineage: over1korc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ] -POSTHOOK: Lineage: over1korc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] -POSTHOOK: Lineage: over1korc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -POSTHOOK: Lineage: over1korc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] -POSTHOOK: Lineage: over1korc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] -POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ] -POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] -POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] -POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(50), avg(50.0), avg(50) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@over1korc -#### A masked pattern was here #### -POSTHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i ORDER BY i LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1korc -#### A masked pattern was here #### -65536 50.0 50.0 50.0000 -65537 50.0 50.0 50.0000 -65538 50.0 50.0 50.0000 -65539 50.0 50.0 50.0000 -65540 50.0 50.0 50.0000 -65541 50.0 50.0 50.0000 -65542 50.0 50.0 50.0000 -65543 50.0 50.0 50.0000 -65544 50.0 50.0 50.0000 -65545 50.0 50.0 50.0000 diff --git ql/src/test/results/clientpositive/vector_cast_constant.q.java1.8.out ql/src/test/results/clientpositive/vector_cast_constant.q.java1.8.out deleted file mode 100644 index 789e6c2b82ffa473f2c8fd29ee9568f83243c2bf..0000000000000000000000000000000000000000 --- ql/src/test/results/clientpositive/vector_cast_constant.q.java1.8.out +++ /dev/null @@ -1,197 +0,0 @@ -PREHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- JAVA_VERSION_SPECIFIC_OUTPUT - -DROP TABLE over1k -POSTHOOK: type: DROPTABLE -PREHOOK: query: DROP TABLE over1korc -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE over1korc -POSTHOOK: type: DROPTABLE -PREHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1k -POSTHOOK: query: -- data setup -CREATE TABLE over1k(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' -STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1k -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@over1k -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@over1k -PREHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@over1korc -POSTHOOK: query: CREATE TABLE over1korc(t tinyint, - si smallint, - i int, - b bigint, - f float, - d double, - bo boolean, - s string, - ts timestamp, - dec decimal(4,2), - bin binary) -STORED AS ORC -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@over1korc -PREHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -PREHOOK: type: QUERY -PREHOOK: Input: default@over1k -PREHOOK: Output: default@over1korc -POSTHOOK: query: INSERT INTO TABLE over1korc SELECT * FROM over1k -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1k -POSTHOOK: Output: default@over1korc -POSTHOOK: Lineage: over1korc.b SIMPLE [(over1k)over1k.FieldSchema(name:b, type:bigint, comment:null), ] -POSTHOOK: Lineage: over1korc.bin SIMPLE [(over1k)over1k.FieldSchema(name:bin, type:binary, comment:null), ] -POSTHOOK: Lineage: over1korc.bo SIMPLE [(over1k)over1k.FieldSchema(name:bo, type:boolean, comment:null), ] -POSTHOOK: Lineage: over1korc.d SIMPLE [(over1k)over1k.FieldSchema(name:d, type:double, comment:null), ] -POSTHOOK: Lineage: over1korc.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] -POSTHOOK: Lineage: over1korc.f SIMPLE [(over1k)over1k.FieldSchema(name:f, type:float, comment:null), ] -POSTHOOK: Lineage: over1korc.i SIMPLE [(over1k)over1k.FieldSchema(name:i, type:int, comment:null), ] -POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:string, comment:null), ] -POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] -POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] -POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(50), avg(50.0), avg(50) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@over1korc -#### A masked pattern was here #### -POSTHOOK: query: SELECT - i, - AVG(CAST(50 AS INT)) AS `avg_int_ok`, - AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, - AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@over1korc -#### A masked pattern was here #### -65536 50.0 50.0 50 -65537 50.0 50.0 50 -65538 50.0 50.0 50 -65539 50.0 50.0 50 -65540 50.0 50.0 50 -65541 50.0 50.0 50 -65542 50.0 50.0 50 -65543 50.0 50.0 50 -65544 50.0 50.0 50 -65545 50.0 50.0 50 diff --git ql/src/test/results/clientpositive/vector_cast_constant.q.out ql/src/test/results/clientpositive/vector_cast_constant.q.out index 39ed1c85839c5b567bca603d335bf947b0a03b7f..6033aad249b242cd5aec6519e20e6de0e2a5fa52 100644 --- ql/src/test/results/clientpositive/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/vector_cast_constant.q.out @@ -102,18 +102,19 @@ PREHOOK: query: EXPLAIN SELECT AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -146,6 +147,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE @@ -168,7 +191,7 @@ PREHOOK: query: SELECT AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@over1korc #### A masked pattern was here #### @@ -177,17 +200,17 @@ POSTHOOK: query: SELECT AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` - FROM over1korc GROUP BY i LIMIT 10 + FROM over1korc GROUP BY i ORDER BY i LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@over1korc #### A masked pattern was here #### -65536 50.0 50.0 50 -65537 50.0 50.0 50 -65538 50.0 50.0 50 -65539 50.0 50.0 50 -65540 50.0 50.0 50 -65541 50.0 50.0 50 -65542 50.0 50.0 50 -65543 50.0 50.0 50 -65544 50.0 50.0 50 -65545 50.0 50.0 50 +65536 50.0 50.0 50.0000 +65537 50.0 50.0 50.0000 +65538 50.0 50.0 50.0000 +65539 50.0 50.0 50.0000 +65540 50.0 50.0 50.0000 +65541 50.0 50.0 50.0000 +65542 50.0 50.0 50.0000 +65543 50.0 50.0 50.0000 +65544 50.0 50.0 50.0000 +65545 50.0 50.0 50.0000