diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 96a03f6..3556a11 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -1,53 +1,16 @@ # NOTE: files should be listed in alphabetical order -minimr.query.files=auto_sortmerge_join_16.q,\ - bucket4.q,\ - bucket5.q,\ - bucket6.q,\ - bucket_many.q,\ - bucket_num_reducers.q,\ - bucket_num_reducers2.q,\ - bucketizedhiveinputformat.q,\ - bucketmapjoin6.q,\ - bucketmapjoin7.q,\ - disable_merge_for_bucketing.q,\ - empty_dir_in_table.q,\ - exchgpartition2lel.q,\ - external_table_with_space_in_location_path.q,\ - file_with_header_footer.q,\ - groupby2.q,\ - import_exported_table.q,\ - index_bitmap3.q,\ - index_bitmap_auto.q,\ - infer_bucket_sort_bucketed_table.q,\ +minimr.query.files=infer_bucket_sort_map_operators.q,\ infer_bucket_sort_dyn_part.q,\ - infer_bucket_sort_map_operators.q,\ infer_bucket_sort_merge.q,\ - infer_bucket_sort_num_buckets.q,\ - infer_bucket_sort_reducers_power_two.q,\ - input16_cc.q,\ - insert_dir_distcp.q,\ - join1.q,\ - join_acid_non_acid.q,\ - leftsemijoin_mr.q,\ - list_bucket_dml_10.q,\ - load_fs2.q,\ - load_hdfs_file_with_space_in_the_name.q,\ - non_native_window_udf.q, \ - parallel_orderby.q,\ - quotedid_smb.q,\ - reduce_deduplicate.q,\ - remote_script.q,\ root_dir_external_table.q,\ - schemeAuthority.q,\ - schemeAuthority2.q,\ + parallel_orderby.q,\ + bucket_num_reducers.q,\ + udf_using.q,\ + infer_bucket_sort_reducers_power_two.q,\ + infer_bucket_sort_num_buckets.q,\ scriptfile1.q,\ - scriptfile1_win.q,\ - skewjoin_onesideskew.q,\ - table_nonprintable.q,\ - temp_table_external.q,\ - truncate_column_buckets.q,\ - uber_reduce.q,\ - udf_using.q + bucket_num_reducers2.q,\ + scriptfile1_win.q # These tests are disabled for minimr # ql_rewrite_gbtoidx.q,\ @@ -245,7 +208,6 @@ minillap.shared.query.files=acid_globallimit.q,\ script_env_var1.q,\ script_env_var2.q,\ script_pipe.q,\ - scriptfile1.q,\ selectDistinctStar.q,\ select_dummy_source.q,\ skewjoin.q,\ @@ -440,29 +402,57 @@ minillap.shared.query.files=acid_globallimit.q,\ minillap.query.files=acid_bucket_pruning.q,\ acid_vectorization_missing_cols.q,\ + auto_sortmerge_join_16.q,\ + bucket4.q,\ + bucket5.q,\ + bucket6.q,\ + bucket_many.q,\ bucket_map_join_tez1.q,\ bucket_map_join_tez2.q,\ + bucketizedhiveinputformat.q,\ + bucketmapjoin6.q,\ + bucketmapjoin7.q,\ bucketpruning1.q,\ constprog_dpp.q,\ + disable_merge_for_bucketing.q,\ dynamic_partition_pruning.q,\ dynamic_partition_pruning_2.q,\ + empty_dir_in_table.q,\ + exchgpartition2lel.q,\ explainuser_1.q,\ explainuser_2.q,\ explainuser_4.q,\ + external_table_with_space_in_location_path.q,\ + file_with_header_footer.q,\ + groupby2.q,\ hybridgrace_hashjoin_1.q,\ hybridgrace_hashjoin_2.q,\ + import_exported_table.q,\ + infer_bucket_sort_bucketed_table.q,\ + input16_cc.q,\ + insert_dir_distcp.q,\ + join1.q,\ + join_acid_non_acid.q,\ + leftsemijoin_mr.q,\ + list_bucket_dml_10.q,\ llap_nullscan.q,\ llap_udf.q,\ llapdecider.q,\ + load_fs2.q,\ + load_hdfs_file_with_space_in_the_name.q,\ lvj_mapjoin.q,\ mapjoin_decimal.q,\ mergejoin_3way.q,\ mrr.q,\ + non_native_window_udf.q,\ orc_llap.q,\ orc_llap_counters.q,\ orc_llap_counters1.q,\ orc_llap_nonvector.q,\ orc_ppd_basic.q,\ + quotedid_smb.q,\ + reduce_deduplicate.q,\ + remote_script.q,\ schema_evol_orc_acid_part.q,\ schema_evol_orc_acid_part_update.q,\ schema_evol_orc_acid_table.q,\ @@ -492,7 +482,11 @@ minillap.query.files=acid_bucket_pruning.q,\ schema_evol_text_vecrow_part_all_complex.q,\ schema_evol_text_vecrow_part_all_primitive.q,\ schema_evol_text_vecrow_table.q,\ + schemeAuthority.q,\ + schemeAuthority2.q,\ smb_cache.q,\ + table_nonprintable.q,\ + temp_table_external.q,\ tez_aggr_part_stats.q,\ tez_bmj_schema_evolution.q,\ tez_dml.q,\ @@ -520,6 +514,7 @@ minillap.query.files=acid_bucket_pruning.q,\ tez_union_view.q,\ tez_vector_dynpart_hashjoin_1.q,\ tez_vector_dynpart_hashjoin_2.q,\ + uber_reduce.q,\ vectorized_dynamic_partition_pruning.q,\ windowing_gby.q diff --git a/ql/src/test/queries/clientpositive/bucket5.q b/ql/src/test/queries/clientpositive/bucket5.q index 0b3bcc5..bf4fbb0 100644 --- a/ql/src/test/queries/clientpositive/bucket5.q +++ b/ql/src/test/queries/clientpositive/bucket5.q @@ -4,6 +4,7 @@ set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; set hive.exec.reducers.max = 1; set hive.merge.mapfiles = true; set hive.merge.mapredfiles = true; +set hive.merge.tezfiles = true; set mapred.reduce.tasks = 2; -- Tests that when a multi insert inserts into a bucketed table and a table which is not bucketed diff --git a/ql/src/test/results/clientpositive/index_bitmap3.q.out b/ql/src/test/results/clientpositive/index_bitmap3.q.out index dc51c77..bcf008d 100644 --- a/ql/src/test/results/clientpositive/index_bitmap3.q.out +++ b/ql/src/test/results/clientpositive/index_bitmap3.q.out @@ -113,35 +113,35 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default__src_src1_index__ - Statistics: Num rows: 500 Data size: 46311 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 48311 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((UDFToDouble(key) = 0.0) and _bucketname is not null and _offset is not null) (type: boolean) - Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: array) TableScan alias: default__src_src2_index__ - Statistics: Num rows: 500 Data size: 48311 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 50311 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((value = 'val_0') and _bucketname is not null and _offset is not null) (type: boolean) - Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 25155 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 25155 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 25155 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: array) Reduce Operator Tree: Join Operator @@ -151,20 +151,20 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: bigint) 1 _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col5 - Statistics: Num rows: 275 Data size: 25470 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 26570 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (not EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(_col2,_col5))) (type: boolean) - Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 138 Data size: 13333 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 138 Data size: 13333 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: collect_set(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 138 Data size: 13333 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -180,7 +180,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 138 Data size: 13333 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: array) Reduce Operator Tree: Group By Operator @@ -188,10 +188,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 69 Data size: 6666 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 69 Data size: 6666 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/index_bitmap_auto.q.out b/ql/src/test/results/clientpositive/index_bitmap_auto.q.out index 7546dd3..47f4e8b 100644 --- a/ql/src/test/results/clientpositive/index_bitmap_auto.q.out +++ b/ql/src/test/results/clientpositive/index_bitmap_auto.q.out @@ -132,35 +132,35 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default__src_src1_index__ - Statistics: Num rows: 500 Data size: 46311 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 48311 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((UDFToDouble(key) = 0.0) and _bucketname is not null and _offset is not null) (type: boolean) - Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: array) TableScan alias: default__src_src2_index__ - Statistics: Num rows: 500 Data size: 48311 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 50311 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((value = 'val_0') and _bucketname is not null and _offset is not null) (type: boolean) - Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 25155 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 25155 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 25155 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: array) Reduce Operator Tree: Join Operator @@ -170,20 +170,20 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: bigint) 1 _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col5 - Statistics: Num rows: 275 Data size: 25470 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 26570 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (not EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(_col2,_col5))) (type: boolean) - Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 138 Data size: 13333 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 138 Data size: 13333 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: collect_set(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 138 Data size: 13333 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -199,7 +199,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 138 Data size: 13333 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: array) Reduce Operator Tree: Group By Operator @@ -207,10 +207,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 69 Data size: 6666 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 69 Data size: 6666 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -262,10 +262,14 @@ POSTHOOK: Input: default@src 0 val_0 PREHOOK: query: SELECT key, value FROM src WHERE key=0 AND value = "val_0" PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src1_index__ +PREHOOK: Input: default@default__src_src2_index__ PREHOOK: Input: default@src #### A masked pattern was here #### POSTHOOK: query: SELECT key, value FROM src WHERE key=0 AND value = "val_0" POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src1_index__ +POSTHOOK: Input: default@default__src_src2_index__ POSTHOOK: Input: default@src #### A masked pattern was here #### 0 val_0 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out deleted file mode 100644 index 5fc5e91..0000000 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out +++ /dev/null @@ -1,336 +0,0 @@ -PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer --- and populating that information in partitions' metadata, it also verifies that the --- number of reducers chosen will be a power of two - -CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@test_table -POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer --- and populating that information in partitions' metadata, it also verifies that the --- number of reducers chosen will be a power of two - -CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@test_table -PREHOOK: query: -- Test group by, should be bucketed and sorted by group by key -INSERT OVERWRITE TABLE test_table PARTITION (part = '1') -SELECT key, count(*) FROM src GROUP BY key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: -- Test group by, should be bucketed and sorted by group by key -INSERT OVERWRITE TABLE test_table PARTITION (part = '1') -SELECT key, count(*) FROM src GROUP BY key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table@part=1 -POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] -PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@test_table -POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@test_table -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -part string - -# Detailed Partition Information -Partition Value: [1] -Database: default -Table: test_table -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 - numRows 309 - rawDataSize 1482 - totalSize 1791 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: 4 -Bucket Columns: [key] -Sort Columns: [Order(col:key, order:1)] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: -- Test join, should be bucketed and sorted by join key -INSERT OVERWRITE TABLE test_table PARTITION (part = '1') -SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: -- Test join, should be bucketed and sorted by join key -INSERT OVERWRITE TABLE test_table PARTITION (part = '1') -SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table@part=1 -POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@test_table -POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@test_table -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -part string - -# Detailed Partition Information -Partition Value: [1] -Database: default -Table: test_table -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 - numRows 1028 - rawDataSize 10968 - totalSize 11996 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: 4 -Bucket Columns: [key] -Sort Columns: [Order(col:key, order:1)] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys -INSERT OVERWRITE TABLE test_table PARTITION (part = '1') -SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys -INSERT OVERWRITE TABLE test_table PARTITION (part = '1') -SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table@part=1 -POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@test_table -POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@test_table -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -part string - -# Detailed Partition Information -Partition Value: [1] -Database: default -Table: test_table -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 - numRows 1028 - rawDataSize 10968 - totalSize 11996 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: 4 -Bucket Columns: [key, value] -Sort Columns: [Order(col:key, order:1), Order(col:value, order:1)] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key -INSERT OVERWRITE TABLE test_table PARTITION (part = '1') -SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key -INSERT OVERWRITE TABLE test_table PARTITION (part = '1') -SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table@part=1 -POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@test_table -POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@test_table -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -part string - -# Detailed Partition Information -Partition Value: [1] -Database: default -Table: test_table -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 - numRows 2654 - rawDataSize 28466 - totalSize 31120 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: 4 -Bucket Columns: [key] -Sort Columns: [Order(col:key, order:1)] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key -INSERT OVERWRITE TABLE test_table PARTITION (part = '1') -SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key -INSERT OVERWRITE TABLE test_table PARTITION (part = '1') -SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table@part=1 -POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@test_table -POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@test_table -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -part string - -# Detailed Partition Information -Partition Value: [1] -Database: default -Table: test_table -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 16 - numRows 2654 - rawDataSize 28466 - totalSize 31120 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: 16 -Bucket Columns: [value] -Sort Columns: [Order(col:value, order:1)] -Storage Desc Params: - serialization.format 1 -PREHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the --- key of the outer group by -INSERT OVERWRITE TABLE test_table PARTITION (part = '1') -SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table@part=1 -POSTHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the --- key of the outer group by -INSERT OVERWRITE TABLE test_table PARTITION (part = '1') -SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table@part=1 -POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] -POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] -PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@test_table -POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@test_table -# col_name data_type comment - -key string -value string - -# Partition Information -# col_name data_type comment - -part string - -# Detailed Partition Information -Partition Value: [1] -Database: default -Table: test_table -#### A masked pattern was here #### -Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 1 - numRows 5 - rawDataSize 19 - totalSize 24 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -InputFormat: org.apache.hadoop.mapred.TextInputFormat -OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat -Compressed: No -Num Buckets: 1 -Bucket Columns: [value] -Sort Columns: [Order(col:value, order:1)] -Storage Desc Params: - serialization.format 1 diff --git a/ql/src/test/results/clientpositive/llap/bucket5.q.out b/ql/src/test/results/clientpositive/llap/bucket5.q.out new file mode 100644 index 0000000..708d1a0 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/bucket5.q.out @@ -0,0 +1,586 @@ +PREHOOK: query: -- Tests that when a multi insert inserts into a bucketed table and a table which is not bucketed +-- the bucketed table is not merged and the table which is not bucketed is + +CREATE TABLE bucketed_table(key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucketed_table +POSTHOOK: query: -- Tests that when a multi insert inserts into a bucketed table and a table which is not bucketed +-- the bucketed table is not merged and the table which is not bucketed is + +CREATE TABLE bucketed_table(key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucketed_table +PREHOOK: query: CREATE TABLE unbucketed_table(key INT, value STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@unbucketed_table +POSTHOOK: query: CREATE TABLE unbucketed_table(key INT, value STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@unbucketed_table +PREHOOK: query: EXPLAIN EXTENDED +FROM src +INSERT OVERWRITE TABLE bucketed_table SELECT key, value +INSERT OVERWRITE TABLE unbucketed_table SELECT key, value cluster by key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +FROM src +INSERT OVERWRITE TABLE bucketed_table SELECT key, value +INSERT OVERWRITE TABLE unbucketed_table SELECT key, value cluster by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2, Stage-7, Stage-6, Stage-9 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-2 , consists of Stage-7, Stage-6, Stage-8 + Stage-7 + Stage-6 + Stage-8 + Stage-9 depends on stages: Stage-8 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToInteger(_col0) (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.bucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct bucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketed_table + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: UDFToInteger(KEY.reducesinkkey0) (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 2 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.unbucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct unbucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.unbucketed_table + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.bucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct bucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketed_table + + Stage: Stage-4 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-1 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.unbucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct unbucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.unbucketed_table + + Stage: Stage-5 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-10 + Conditional Operator + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-6 + Tez +#### A masked pattern was here #### + Vertices: + File Merge + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.unbucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct unbucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.unbucketed_table + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10004 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.unbucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct unbucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.unbucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct unbucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.unbucketed_table + name: default.unbucketed_table + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-8 + Tez +#### A masked pattern was here #### + Vertices: + File Merge + Map Operator Tree: + TableScan + GatherStats: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.unbucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct unbucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.unbucketed_table + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -ext-10004 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.unbucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct unbucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.unbucketed_table + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct unbucketed_table { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.unbucketed_table + name: default.unbucketed_table + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-9 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE bucketed_table SELECT key, value +INSERT OVERWRITE TABLE unbucketed_table SELECT key, value cluster by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bucketed_table +PREHOOK: Output: default@unbucketed_table +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE bucketed_table SELECT key, value +INSERT OVERWRITE TABLE unbucketed_table SELECT key, value cluster by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bucketed_table +POSTHOOK: Output: default@unbucketed_table +POSTHOOK: Lineage: bucketed_table.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bucketed_table.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: unbucketed_table.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: unbucketed_table.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESC FORMATTED bucketed_table +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@bucketed_table +POSTHOOK: query: DESC FORMATTED bucketed_table +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@bucketed_table +# col_name data_type comment + +key int +value string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + SORTBUCKETCOLSPREFIX TRUE + numFiles 2 + numRows 500 + rawDataSize 5312 + totalSize 5812 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 2 +Bucket Columns: [key] +Sort Columns: [Order(col:key, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: SELECT * FROM bucketed_table TABLESAMPLE (BUCKET 1 OUT OF 2) s LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketed_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM bucketed_table TABLESAMPLE (BUCKET 1 OUT OF 2) s LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketed_table +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +2 val_2 +4 val_4 +8 val_8 +10 val_10 +12 val_12 +12 val_12 +18 val_18 +PREHOOK: query: SELECT * FROM bucketed_table TABLESAMPLE (BUCKET 2 OUT OF 2) s LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketed_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM bucketed_table TABLESAMPLE (BUCKET 2 OUT OF 2) s LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketed_table +#### A masked pattern was here #### +5 val_5 +5 val_5 +5 val_5 +9 val_9 +11 val_11 +15 val_15 +15 val_15 +17 val_17 +19 val_19 +27 val_27 +PREHOOK: query: -- Should be 2 (not merged) +SELECT COUNT(DISTINCT INPUT__FILE__NAME) FROM bucketed_table +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketed_table +#### A masked pattern was here #### +POSTHOOK: query: -- Should be 2 (not merged) +SELECT COUNT(DISTINCT INPUT__FILE__NAME) FROM bucketed_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketed_table +#### A masked pattern was here #### +2 +PREHOOK: query: -- Should be 1 (merged) +SELECT COUNT(DISTINCT INPUT__FILE__NAME) FROM unbucketed_table +PREHOOK: type: QUERY +PREHOOK: Input: default@unbucketed_table +#### A masked pattern was here #### +POSTHOOK: query: -- Should be 1 (merged) +SELECT COUNT(DISTINCT INPUT__FILE__NAME) FROM unbucketed_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@unbucketed_table +#### A masked pattern was here #### +1 diff --git a/ql/src/test/results/clientpositive/llap/bucket6.q.out b/ql/src/test/results/clientpositive/llap/bucket6.q.out new file mode 100644 index 0000000..20895f8 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/bucket6.q.out @@ -0,0 +1,203 @@ +PREHOOK: query: CREATE TABLE src_bucket(key STRING, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_bucket +POSTHOOK: query: CREATE TABLE src_bucket(key STRING, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_bucket +PREHOOK: query: explain +insert into table src_bucket select key,value from srcpart +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert into table src_bucket select key,value from srcpart +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_bucket + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_bucket + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert into table src_bucket select key,value from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@src_bucket +POSTHOOK: query: insert into table src_bucket select key,value from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@src_bucket +POSTHOOK: Lineage: src_bucket.key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_bucket.value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from src_bucket limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_bucket +#### A masked pattern was here #### +POSTHOOK: query: select * from src_bucket limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_bucket +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +105 val_105 +105 val_105 +105 val_105 +105 val_105 +11 val_11 +11 val_11 +11 val_11 +11 val_11 +114 val_114 +114 val_114 +114 val_114 +114 val_114 +116 val_116 +116 val_116 +116 val_116 +116 val_116 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +136 val_136 +136 val_136 +136 val_136 +136 val_136 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +143 val_143 +143 val_143 +143 val_143 +145 val_145 +145 val_145 +145 val_145 +145 val_145 +149 val_149 +149 val_149 +149 val_149 +149 val_149 diff --git a/ql/src/test/results/clientpositive/llap/bucket_many.q.out b/ql/src/test/results/clientpositive/llap/bucket_many.q.out new file mode 100644 index 0000000..8433022 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/bucket_many.q.out @@ -0,0 +1,223 @@ +PREHOOK: query: create table bucket_many(key int, value string) clustered by (key) into 256 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_many +POSTHOOK: query: create table bucket_many(key int, value string) clustered by (key) into 256 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_many +PREHOOK: query: explain extended +insert overwrite table bucket_many +select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucket_many +select * from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 16 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count 256 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.bucket_many + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct bucket_many { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_many + TotalFiles: 256 + GatherStats: true + MultiFileSpray: true + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count 256 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.bucket_many + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct bucket_many { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_many + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table bucket_many +select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bucket_many +POSTHOOK: query: insert overwrite table bucket_many +select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bucket_many +POSTHOOK: Lineage: bucket_many.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bucket_many.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +select * from bucket_many tablesample (bucket 1 out of 256) s +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from bucket_many tablesample (bucket 1 out of 256) s +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: s + Filter Operator + predicate: (((hash(key) & 2147483647) % 256) = 0) (type: boolean) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: select * from bucket_many tablesample (bucket 1 out of 256) s +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_many +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket_many tablesample (bucket 1 out of 256) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_many +#### A masked pattern was here #### +0 val_0 +0 val_0 +256 val_256 +256 val_256 +0 val_0 diff --git a/ql/src/test/results/clientpositive/llap/bucketizedhiveinputformat.q.out b/ql/src/test/results/clientpositive/llap/bucketizedhiveinputformat.q.out new file mode 100644 index 0000000..163e819 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/bucketizedhiveinputformat.q.out @@ -0,0 +1,112 @@ +PREHOOK: query: CREATE TABLE T1(name STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(name STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +Warning: Shuffle Join MERGEJOIN[16][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM ( +SELECT tmp1.name as name FROM ( + SELECT name, 'MMM' AS n FROM T1) tmp1 + JOIN (SELECT 'MMM' AS n FROM T1) tmp2 + JOIN (SELECT 'MMM' AS n FROM T1) tmp3 + ON tmp1.n = tmp2.n AND tmp1.n = tmp3.n) ttt LIMIT 5000000 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t2 +POSTHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM ( +SELECT tmp1.name as name FROM ( + SELECT name, 'MMM' AS n FROM T1) tmp1 + JOIN (SELECT 'MMM' AS n FROM T1) tmp2 + JOIN (SELECT 'MMM' AS n FROM T1) tmp3 + ON tmp1.n = tmp2.n AND tmp1.n = tmp3.n) ttt LIMIT 5000000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.name SIMPLE [(t1)t1.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE T3(name STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: CREATE TABLE T3(name STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t3 +PREHOOK: query: -- 2 split by max.split.size +SELECT COUNT(1) FROM T2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: -- 2 split by max.split.size +SELECT COUNT(1) FROM T2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +5000000 +PREHOOK: query: -- 1 split for two file +SELECT COUNT(1) FROM T3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: -- 1 split for two file +SELECT COUNT(1) FROM T3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +1000 +PREHOOK: query: -- 1 split +SELECT COUNT(1) FROM T2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: -- 1 split +SELECT COUNT(1) FROM T2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +5000000 +PREHOOK: query: -- 2 split for two file +SELECT COUNT(1) FROM T3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: -- 2 split for two file +SELECT COUNT(1) FROM T3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +1000 diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin6.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin6.q.out new file mode 100644 index 0000000..198404b --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin6.q.out @@ -0,0 +1,146 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table tmp1 (a string, b string) clustered by (a) sorted by (a) into 10 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table tmp1 (a string, b string) clustered by (a) sorted by (a) into 10 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp1 +PREHOOK: query: create table tmp2 (a string, b string) clustered by (a) sorted by (a) into 10 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp2 +POSTHOOK: query: create table tmp2 (a string, b string) clustered by (a) sorted by (a) into 10 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp2 +PREHOOK: query: insert overwrite table tmp1 select * from src where key < 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tmp1 +POSTHOOK: query: insert overwrite table tmp1 select * from src where key < 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tmp1 +POSTHOOK: Lineage: tmp1.a SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp1.b SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tmp2 select * from src where key < 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tmp2 +POSTHOOK: query: insert overwrite table tmp2 select * from src where key < 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tmp2 +POSTHOOK: Lineage: tmp2.a SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp2.b SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table tmp3 (a string, b string, c string) clustered by (a) sorted by (a) into 10 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp3 +POSTHOOK: query: create table tmp3 (a string, b string, c string) clustered by (a) sorted by (a) into 10 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp3 +PREHOOK: query: insert overwrite table tmp3 + select /*+ MAPJOIN(l) */ i.a, i.b, l.b + from tmp1 i join tmp2 l ON i.a = l.a +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp1 +PREHOOK: Input: default@tmp2 +PREHOOK: Output: default@tmp3 +POSTHOOK: query: insert overwrite table tmp3 + select /*+ MAPJOIN(l) */ i.a, i.b, l.b + from tmp1 i join tmp2 l ON i.a = l.a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp1 +POSTHOOK: Input: default@tmp2 +POSTHOOK: Output: default@tmp3 +POSTHOOK: Lineage: tmp3.a SIMPLE [(tmp1)i.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: tmp3.b SIMPLE [(tmp1)i.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: tmp3.c SIMPLE [(tmp2)l.FieldSchema(name:b, type:string, comment:null), ] +PREHOOK: query: select * from tmp3 +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp3 +#### A masked pattern was here #### +POSTHOOK: query: select * from tmp3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp3 +#### A masked pattern was here #### +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +10 val_10 val_10 +11 val_11 val_11 +12 val_12 val_12 +12 val_12 val_12 +12 val_12 val_12 +12 val_12 val_12 +15 val_15 val_15 +15 val_15 val_15 +15 val_15 val_15 +15 val_15 val_15 +17 val_17 val_17 +18 val_18 val_18 +18 val_18 val_18 +18 val_18 val_18 +18 val_18 val_18 +19 val_19 val_19 +2 val_2 val_2 +20 val_20 val_20 +24 val_24 val_24 +24 val_24 val_24 +24 val_24 val_24 +24 val_24 val_24 +26 val_26 val_26 +26 val_26 val_26 +26 val_26 val_26 +26 val_26 val_26 +27 val_27 val_27 +28 val_28 val_28 +30 val_30 val_30 +33 val_33 val_33 +34 val_34 val_34 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +37 val_37 val_37 +37 val_37 val_37 +37 val_37 val_37 +37 val_37 val_37 +4 val_4 val_4 +41 val_41 val_41 +42 val_42 val_42 +42 val_42 val_42 +42 val_42 val_42 +42 val_42 val_42 +43 val_43 val_43 +44 val_44 val_44 +47 val_47 val_47 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +8 val_8 val_8 +9 val_9 val_9 diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin7.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin7.q.out new file mode 100644 index 0000000..b515af6 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin7.q.out @@ -0,0 +1,315 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 +PREHOOK: query: -- Tests that bucket map join works with a table with more than one level of partitioning + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.ds = '2008-04-08' AND b.ds = '2008-04-08' +ORDER BY a.key, b.value LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Tests that bucket map join works with a table with more than one level of partitioning + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.ds = '2008-04-08' AND b.ds = '2008-04-08' +ORDER BY a.key, b.value LIMIT 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 687 Data size: 8246 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 687 Data size: 8246 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 687 Data size: 8246 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=0 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 0 + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/ds=2008-04-08/hr=0 [a] + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 26 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=0 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 0 + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/ds=2008-04-08/hr=0 [b] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col8 + Position of Big Table: 0 + Statistics: Num rows: 755 Data size: 9070 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 755 Data size: 9070 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: aa + sort order: ++ + Statistics: Num rows: 755 Data size: 9070 Basic stats: COMPLETE Column stats: NONE + tag: -1 + TopN: 1 + TopN Hash Memory Usage: 0.1 + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 755 Data size: 9070 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.ds = '2008-04-08' AND b.ds = '2008-04-08' +ORDER BY a.key, b.value LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.ds = '2008-04-08' AND b.ds = '2008-04-08' +ORDER BY a.key, b.value LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 +#### A masked pattern was here #### +0 val_0 diff --git a/ql/src/test/results/clientpositive/llap/empty_dir_in_table.q.out b/ql/src/test/results/clientpositive/llap/empty_dir_in_table.q.out new file mode 100644 index 0000000..e804de3 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/empty_dir_in_table.q.out @@ -0,0 +1,46 @@ +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@roottable +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@roottable +PREHOOK: query: select count(*) from roottable +PREHOOK: type: QUERY +PREHOOK: Input: default@roottable +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from roottable +POSTHOOK: type: QUERY +POSTHOOK: Input: default@roottable +#### A masked pattern was here #### +0 +PREHOOK: query: insert into table roottable select key from src where (key < 20) order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@roottable +POSTHOOK: query: insert into table roottable select key from src where (key < 20) order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@roottable +POSTHOOK: Lineage: roottable.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: select count(*) from roottable +PREHOOK: type: QUERY +PREHOOK: Input: default@roottable +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from roottable +POSTHOOK: type: QUERY +POSTHOOK: Input: default@roottable +#### A masked pattern was here #### +20 +PREHOOK: query: select count(*) from roottable +PREHOOK: type: QUERY +PREHOOK: Input: default@roottable +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from roottable +POSTHOOK: type: QUERY +POSTHOOK: Input: default@roottable +#### A masked pattern was here #### +20 diff --git a/ql/src/test/results/clientpositive/llap/exchgpartition2lel.q.out b/ql/src/test/results/clientpositive/llap/exchgpartition2lel.q.out new file mode 100644 index 0000000..69d6d88 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/exchgpartition2lel.q.out @@ -0,0 +1,203 @@ +PREHOOK: query: DROP TABLE IF EXISTS t1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS t1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS t2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS t2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS t3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS t3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS t4 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS t4 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE t1 (a int) PARTITIONED BY (d1 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: CREATE TABLE t1 (a int) PARTITIONED BY (d1 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE t2 (a int) PARTITIONED BY (d1 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: CREATE TABLE t2 (a int) PARTITIONED BY (d1 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: CREATE TABLE t3 (a int) PARTITIONED BY (d1 int, d2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t3 +POSTHOOK: query: CREATE TABLE t3 (a int) PARTITIONED BY (d1 int, d2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t3 +PREHOOK: query: CREATE TABLE t4 (a int) PARTITIONED BY (d1 int, d2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t4 +POSTHOOK: query: CREATE TABLE t4 (a int) PARTITIONED BY (d1 int, d2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t4 +PREHOOK: query: CREATE TABLE t5 (a int) PARTITIONED BY (d1 int, d2 int, d3 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t5 +POSTHOOK: query: CREATE TABLE t5 (a int) PARTITIONED BY (d1 int, d2 int, d3 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t5 +PREHOOK: query: CREATE TABLE t6 (a int) PARTITIONED BY (d1 int, d2 int, d3 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t6 +POSTHOOK: query: CREATE TABLE t6 (a int) PARTITIONED BY (d1 int, d2 int, d3 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t6 +PREHOOK: query: INSERT OVERWRITE TABLE t1 PARTITION (d1 = 1) SELECT key FROM src where key = 100 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t1@d1=1 +POSTHOOK: query: INSERT OVERWRITE TABLE t1 PARTITION (d1 = 1) SELECT key FROM src where key = 100 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t1@d1=1 +POSTHOOK: Lineage: t1 PARTITION(d1=1).a EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE t3 PARTITION (d1 = 1, d2 = 1) SELECT key FROM src where key = 100 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t3@d1=1/d2=1 +POSTHOOK: query: INSERT OVERWRITE TABLE t3 PARTITION (d1 = 1, d2 = 1) SELECT key FROM src where key = 100 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t3@d1=1/d2=1 +POSTHOOK: Lineage: t3 PARTITION(d1=1,d2=1).a EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE t5 PARTITION (d1 = 1, d2 = 1, d3=1) SELECT key FROM src where key = 100 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t5@d1=1/d2=1/d3=1 +POSTHOOK: query: INSERT OVERWRITE TABLE t5 PARTITION (d1 = 1, d2 = 1, d3=1) SELECT key FROM src where key = 100 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t5@d1=1/d2=1/d3=1 +POSTHOOK: Lineage: t5 PARTITION(d1=1,d2=1,d3=1).a EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT * FROM t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@d1=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@d1=1 +#### A masked pattern was here #### +100 1 +PREHOOK: query: SELECT * FROM t3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t3 +PREHOOK: Input: default@t3@d1=1/d2=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t3 +POSTHOOK: Input: default@t3@d1=1/d2=1 +#### A masked pattern was here #### +100 1 1 +PREHOOK: query: ALTER TABLE t2 EXCHANGE PARTITION (d1 = 1) WITH TABLE t1 +PREHOOK: type: ALTERTABLE_EXCHANGEPARTITION +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t2 +POSTHOOK: query: ALTER TABLE t2 EXCHANGE PARTITION (d1 = 1) WITH TABLE t1 +POSTHOOK: type: ALTERTABLE_EXCHANGEPARTITION +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@d1=1 +POSTHOOK: Output: default@t1@d1=1 +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t2@d1=1 +PREHOOK: query: SELECT * FROM t1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +PREHOOK: query: SELECT * FROM t2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t2@d1=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t2@d1=1 +#### A masked pattern was here #### +100 1 +PREHOOK: query: ALTER TABLE t4 EXCHANGE PARTITION (d1 = 1, d2 = 1) WITH TABLE t3 +PREHOOK: type: ALTERTABLE_EXCHANGEPARTITION +PREHOOK: Input: default@t3 +PREHOOK: Output: default@t4 +POSTHOOK: query: ALTER TABLE t4 EXCHANGE PARTITION (d1 = 1, d2 = 1) WITH TABLE t3 +POSTHOOK: type: ALTERTABLE_EXCHANGEPARTITION +POSTHOOK: Input: default@t3 +POSTHOOK: Input: default@t3@d1=1/d2=1 +POSTHOOK: Output: default@t3@d1=1/d2=1 +POSTHOOK: Output: default@t4 +POSTHOOK: Output: default@t4@d1=1/d2=1 +PREHOOK: query: SELECT * FROM t3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +PREHOOK: query: SELECT * FROM t4 +PREHOOK: type: QUERY +PREHOOK: Input: default@t4 +PREHOOK: Input: default@t4@d1=1/d2=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t4 +POSTHOOK: Input: default@t4@d1=1/d2=1 +#### A masked pattern was here #### +100 1 1 +PREHOOK: query: ALTER TABLE t6 EXCHANGE PARTITION (d1 = 1, d2 = 1, d3 = 1) WITH TABLE t5 +PREHOOK: type: ALTERTABLE_EXCHANGEPARTITION +PREHOOK: Input: default@t5 +PREHOOK: Output: default@t6 +POSTHOOK: query: ALTER TABLE t6 EXCHANGE PARTITION (d1 = 1, d2 = 1, d3 = 1) WITH TABLE t5 +POSTHOOK: type: ALTERTABLE_EXCHANGEPARTITION +POSTHOOK: Input: default@t5 +POSTHOOK: Input: default@t5@d1=1/d2=1/d3=1 +POSTHOOK: Output: default@t5@d1=1/d2=1/d3=1 +POSTHOOK: Output: default@t6 +POSTHOOK: Output: default@t6@d1=1/d2=1/d3=1 +PREHOOK: query: SELECT * FROM t5 +PREHOOK: type: QUERY +PREHOOK: Input: default@t5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t5 +#### A masked pattern was here #### +PREHOOK: query: SELECT * FROM t6 +PREHOOK: type: QUERY +PREHOOK: Input: default@t6 +PREHOOK: Input: default@t6@d1=1/d2=1/d3=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t6 +POSTHOOK: Input: default@t6@d1=1/d2=1/d3=1 +#### A masked pattern was here #### +100 1 1 1 diff --git a/ql/src/test/results/clientpositive/llap/external_table_with_space_in_location_path.q.out b/ql/src/test/results/clientpositive/llap/external_table_with_space_in_location_path.q.out new file mode 100644 index 0000000..831e339 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/external_table_with_space_in_location_path.q.out @@ -0,0 +1,88 @@ +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@spacetest +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@spacetest +PREHOOK: query: SELECT * FROM spacetest +PREHOOK: type: QUERY +PREHOOK: Input: default@spacetest +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM spacetest +POSTHOOK: type: QUERY +POSTHOOK: Input: default@spacetest +#### A masked pattern was here #### +12 jason +13 steven +15 joe +PREHOOK: query: SELECT count(*) FROM spacetest +PREHOOK: type: QUERY +PREHOOK: Input: default@spacetest +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) FROM spacetest +POSTHOOK: type: QUERY +POSTHOOK: Input: default@spacetest +#### A masked pattern was here #### +3 +PREHOOK: query: DROP TABLE spacetest +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@spacetest +PREHOOK: Output: default@spacetest +POSTHOOK: query: DROP TABLE spacetest +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@spacetest +POSTHOOK: Output: default@spacetest +PREHOOK: query: CREATE EXTERNAL TABLE spacetestpartition (id int, message string) PARTITIONED BY (day int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@spacetestpartition +POSTHOOK: query: CREATE EXTERNAL TABLE spacetestpartition (id int, message string) PARTITIONED BY (day int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@spacetestpartition +#### A masked pattern was here #### +PREHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +PREHOOK: Output: default@spacetestpartition +#### A masked pattern was here #### +POSTHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +POSTHOOK: Output: default@spacetestpartition +POSTHOOK: Output: default@spacetestpartition@day=10 +PREHOOK: query: SELECT * FROM spacetestpartition +PREHOOK: type: QUERY +PREHOOK: Input: default@spacetestpartition +PREHOOK: Input: default@spacetestpartition@day=10 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM spacetestpartition +POSTHOOK: type: QUERY +POSTHOOK: Input: default@spacetestpartition +POSTHOOK: Input: default@spacetestpartition@day=10 +#### A masked pattern was here #### +12 jason 10 +13 steven 10 +15 joe 10 +PREHOOK: query: SELECT count(*) FROM spacetestpartition +PREHOOK: type: QUERY +PREHOOK: Input: default@spacetestpartition +PREHOOK: Input: default@spacetestpartition@day=10 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) FROM spacetestpartition +POSTHOOK: type: QUERY +POSTHOOK: Input: default@spacetestpartition +POSTHOOK: Input: default@spacetestpartition@day=10 +#### A masked pattern was here #### +3 +PREHOOK: query: DROP TABLE spacetestpartition +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@spacetestpartition +PREHOOK: Output: default@spacetestpartition +POSTHOOK: query: DROP TABLE spacetestpartition +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@spacetestpartition +POSTHOOK: Output: default@spacetestpartition +#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/file_with_header_footer.q.out b/ql/src/test/results/clientpositive/llap/file_with_header_footer.q.out new file mode 100644 index 0000000..ca3dadb --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/file_with_header_footer.q.out @@ -0,0 +1,184 @@ +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@header_footer_table_1 +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@header_footer_table_1 +PREHOOK: query: SELECT * FROM header_footer_table_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM header_footer_table_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_1 +#### A masked pattern was here #### +steven hive 1 +dave oozie 2 +xifa phd 3 +chuan hadoop 4 +shanyu senior 5 +steven2 hive 11 +dave2 oozie 12 +xifa2 phd 13 +chuan2 hadoop 14 +shanyu2 senior 15 +david3 oozie 22 +PREHOOK: query: SELECT * FROM header_footer_table_1 WHERE id < 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM header_footer_table_1 WHERE id < 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_1 +#### A masked pattern was here #### +steven hive 1 +dave oozie 2 +xifa phd 3 +chuan hadoop 4 +shanyu senior 5 +steven2 hive 11 +dave2 oozie 12 +xifa2 phd 13 +chuan2 hadoop 14 +shanyu2 senior 15 +david3 oozie 22 +PREHOOK: query: CREATE EXTERNAL TABLE header_footer_table_2 (name string, message string, id int) PARTITIONED BY (year int, month int, day int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' tblproperties ("skip.header.line.count"="1", "skip.footer.line.count"="2") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@header_footer_table_2 +POSTHOOK: query: CREATE EXTERNAL TABLE header_footer_table_2 (name string, message string, id int) PARTITIONED BY (year int, month int, day int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' tblproperties ("skip.header.line.count"="1", "skip.footer.line.count"="2") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@header_footer_table_2 +#### A masked pattern was here #### +PREHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +PREHOOK: Output: default@header_footer_table_2 +#### A masked pattern was here #### +POSTHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +POSTHOOK: Output: default@header_footer_table_2 +POSTHOOK: Output: default@header_footer_table_2@year=2012/month=1/day=1 +#### A masked pattern was here #### +PREHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +PREHOOK: Output: default@header_footer_table_2 +#### A masked pattern was here #### +POSTHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +POSTHOOK: Output: default@header_footer_table_2 +POSTHOOK: Output: default@header_footer_table_2@year=2012/month=1/day=2 +#### A masked pattern was here #### +PREHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +PREHOOK: Output: default@header_footer_table_2 +#### A masked pattern was here #### +POSTHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +POSTHOOK: Output: default@header_footer_table_2 +POSTHOOK: Output: default@header_footer_table_2@year=2012/month=1/day=3 +PREHOOK: query: SELECT * FROM header_footer_table_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_2 +PREHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=1 +PREHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=2 +PREHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM header_footer_table_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_2 +POSTHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=1 +POSTHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=2 +POSTHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=3 +#### A masked pattern was here #### +steven hive 1 2012 1 1 +dave oozie 2 2012 1 1 +xifa phd 3 2012 1 1 +chuan hadoop 4 2012 1 1 +shanyu senior 5 2012 1 1 +steven2 hive 11 2012 1 2 +dave2 oozie 12 2012 1 2 +xifa2 phd 13 2012 1 2 +chuan2 hadoop 14 2012 1 2 +shanyu2 senior 15 2012 1 2 +david3 oozie 22 2012 1 3 +PREHOOK: query: SELECT * FROM header_footer_table_2 WHERE id < 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@header_footer_table_2 +PREHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=1 +PREHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=2 +PREHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM header_footer_table_2 WHERE id < 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@header_footer_table_2 +POSTHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=1 +POSTHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=2 +POSTHOOK: Input: default@header_footer_table_2@year=2012/month=1/day=3 +#### A masked pattern was here #### +steven hive 1 2012 1 1 +dave oozie 2 2012 1 1 +xifa phd 3 2012 1 1 +chuan hadoop 4 2012 1 1 +shanyu senior 5 2012 1 1 +steven2 hive 11 2012 1 2 +dave2 oozie 12 2012 1 2 +xifa2 phd 13 2012 1 2 +chuan2 hadoop 14 2012 1 2 +shanyu2 senior 15 2012 1 2 +david3 oozie 22 2012 1 3 +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@emptytable +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emptytable +PREHOOK: query: SELECT * FROM emptytable +PREHOOK: type: QUERY +PREHOOK: Input: default@emptytable +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM emptytable +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emptytable +#### A masked pattern was here #### +PREHOOK: query: SELECT * FROM emptytable WHERE id < 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@emptytable +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM emptytable WHERE id < 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emptytable +#### A masked pattern was here #### +PREHOOK: query: DROP TABLE header_footer_table_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@header_footer_table_1 +PREHOOK: Output: default@header_footer_table_1 +POSTHOOK: query: DROP TABLE header_footer_table_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@header_footer_table_1 +POSTHOOK: Output: default@header_footer_table_1 +PREHOOK: query: DROP TABLE header_footer_table_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@header_footer_table_2 +PREHOOK: Output: default@header_footer_table_2 +POSTHOOK: query: DROP TABLE header_footer_table_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@header_footer_table_2 +POSTHOOK: Output: default@header_footer_table_2 +PREHOOK: query: DROP TABLE emptytable +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@emptytable +PREHOOK: Output: default@emptytable +POSTHOOK: query: DROP TABLE emptytable +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@emptytable +POSTHOOK: Output: default@emptytable +#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/import_exported_table.q.out b/ql/src/test/results/clientpositive/llap/import_exported_table.q.out new file mode 100644 index 0000000..65d7480 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/import_exported_table.q.out @@ -0,0 +1,28 @@ +#### A masked pattern was here #### +PREHOOK: type: IMPORT +#### A masked pattern was here #### +PREHOOK: Output: database:default +#### A masked pattern was here #### +POSTHOOK: type: IMPORT +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@j1_41 +PREHOOK: query: DESCRIBE j1_41 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@j1_41 +POSTHOOK: query: DESCRIBE j1_41 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@j1_41 +a string +b int +PREHOOK: query: SELECT * from j1_41 +PREHOOK: type: QUERY +PREHOOK: Input: default@j1_41 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from j1_41 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@j1_41 +#### A masked pattern was here #### +johndee 1 +burks 2 +#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out new file mode 100644 index 0000000..33d795b --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out @@ -0,0 +1,121 @@ +PREHOOK: query: -- Test writing to a bucketed table, the output should be bucketed by the bucketing key into the +-- a number of files equal to the number of buckets +CREATE TABLE test_table_bucketed (key STRING, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (value) SORTED BY (value) INTO 3 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table_bucketed +POSTHOOK: query: -- Test writing to a bucketed table, the output should be bucketed by the bucketing key into the +-- a number of files equal to the number of buckets +CREATE TABLE test_table_bucketed (key STRING, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (value) SORTED BY (value) INTO 3 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table_bucketed +PREHOOK: query: -- Despite the fact that normally inferring would say this table is bucketed and sorted on key, +-- this should be bucketed and sorted by value into 3 buckets +INSERT OVERWRITE TABLE test_table_bucketed PARTITION (part = '1') +SELECT key, count(1) FROM src GROUP BY KEY +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table_bucketed@part=1 +POSTHOOK: query: -- Despite the fact that normally inferring would say this table is bucketed and sorted on key, +-- this should be bucketed and sorted by value into 3 buckets +INSERT OVERWRITE TABLE test_table_bucketed PARTITION (part = '1') +SELECT key, count(1) FROM src GROUP BY KEY +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table_bucketed@part=1 +POSTHOOK: Lineage: test_table_bucketed PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table_bucketed PARTITION(part=1).value EXPRESSION [(src)src.null, ] +PREHOOK: query: DESCRIBE FORMATTED test_table_bucketed PARTITION (part = '1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test_table_bucketed +POSTHOOK: query: DESCRIBE FORMATTED test_table_bucketed PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test_table_bucketed +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +part string + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table_bucketed +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 3 + numRows 309 + rawDataSize 1482 + totalSize 1791 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: 3 +Bucket Columns: [value] +Sort Columns: [Order(col:value, order:1)] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- If the count(*) from sampling the buckets matches the count(*) from each file, the table is +-- bucketed +SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 1 OUT OF 3) WHERE part = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table_bucketed +PREHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +POSTHOOK: query: -- If the count(*) from sampling the buckets matches the count(*) from each file, the table is +-- bucketed +SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 1 OUT OF 3) WHERE part = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table_bucketed +POSTHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +31 +PREHOOK: query: SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 2 OUT OF 3) WHERE part = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table_bucketed +PREHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 2 OUT OF 3) WHERE part = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table_bucketed +POSTHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +179 +PREHOOK: query: SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 3 OUT OF 3) WHERE part = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table_bucketed +PREHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 3 OUT OF 3) WHERE part = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table_bucketed +POSTHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +99 +PREHOOK: query: SELECT cnt FROM (SELECT INPUT__FILE__NAME, COUNT(*) cnt FROM test_table_bucketed WHERE part = '1' +GROUP BY INPUT__FILE__NAME ORDER BY INPUT__FILE__NAME ASC LIMIT 3) a +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table_bucketed +PREHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT cnt FROM (SELECT INPUT__FILE__NAME, COUNT(*) cnt FROM test_table_bucketed WHERE part = '1' +GROUP BY INPUT__FILE__NAME ORDER BY INPUT__FILE__NAME ASC LIMIT 3) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table_bucketed +POSTHOOK: Input: default@test_table_bucketed@part=1 +#### A masked pattern was here #### +31 +179 +99 diff --git a/ql/src/test/results/clientpositive/llap/input16_cc.q.out b/ql/src/test/results/clientpositive/llap/input16_cc.q.out new file mode 100644 index 0000000..9ea3472 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/input16_cc.q.out @@ -0,0 +1,534 @@ +PREHOOK: query: -- TestSerDe is a user defined serde where the default delimiter is Ctrl-B +-- the user is overwriting it with ctrlC + +DROP TABLE INPUT16_CC +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- TestSerDe is a user defined serde where the default delimiter is Ctrl-B +-- the user is overwriting it with ctrlC + +DROP TABLE INPUT16_CC +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE INPUT16_CC(KEY STRING, VALUE STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.TestSerDe' with serdeproperties ('testserde.default.serialization.format'='\003', 'dummy.prop.not.used'='dummyy.val') STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@INPUT16_CC +POSTHOOK: query: CREATE TABLE INPUT16_CC(KEY STRING, VALUE STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.TestSerDe' with serdeproperties ('testserde.default.serialization.format'='\003', 'dummy.prop.not.used'='dummyy.val') STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@INPUT16_CC +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1_cc.txt' INTO TABLE INPUT16_CC +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@input16_cc +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1_cc.txt' INTO TABLE INPUT16_CC +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@input16_cc +PREHOOK: query: SELECT INPUT16_CC.VALUE, INPUT16_CC.KEY FROM INPUT16_CC +PREHOOK: type: QUERY +PREHOOK: Input: default@input16_cc +#### A masked pattern was here #### +POSTHOOK: query: SELECT INPUT16_CC.VALUE, INPUT16_CC.KEY FROM INPUT16_CC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@input16_cc +#### A masked pattern was here #### +val_238 238 +val_86 86 +val_311 311 +val_27 27 +val_165 165 +val_409 409 +val_255 255 +val_278 278 +val_98 98 +val_484 484 +val_265 265 +val_193 193 +val_401 401 +val_150 150 +val_273 273 +val_224 224 +val_369 369 +val_66 66 +val_128 128 +val_213 213 +val_146 146 +val_406 406 +val_429 429 +val_374 374 +val_152 152 +val_469 469 +val_145 145 +val_495 495 +val_37 37 +val_327 327 +val_281 281 +val_277 277 +val_209 209 +val_15 15 +val_82 82 +val_403 403 +val_166 166 +val_417 417 +val_430 430 +val_252 252 +val_292 292 +val_219 219 +val_287 287 +val_153 153 +val_193 193 +val_338 338 +val_446 446 +val_459 459 +val_394 394 +val_237 237 +val_482 482 +val_174 174 +val_413 413 +val_494 494 +val_207 207 +val_199 199 +val_466 466 +val_208 208 +val_174 174 +val_399 399 +val_396 396 +val_247 247 +val_417 417 +val_489 489 +val_162 162 +val_377 377 +val_397 397 +val_309 309 +val_365 365 +val_266 266 +val_439 439 +val_342 342 +val_367 367 +val_325 325 +val_167 167 +val_195 195 +val_475 475 +val_17 17 +val_113 113 +val_155 155 +val_203 203 +val_339 339 +val_0 0 +val_455 455 +val_128 128 +val_311 311 +val_316 316 +val_57 57 +val_302 302 +val_205 205 +val_149 149 +val_438 438 +val_345 345 +val_129 129 +val_170 170 +val_20 20 +val_489 489 +val_157 157 +val_378 378 +val_221 221 +val_92 92 +val_111 111 +val_47 47 +val_72 72 +val_4 4 +val_280 280 +val_35 35 +val_427 427 +val_277 277 +val_208 208 +val_356 356 +val_399 399 +val_169 169 +val_382 382 +val_498 498 +val_125 125 +val_386 386 +val_437 437 +val_469 469 +val_192 192 +val_286 286 +val_187 187 +val_176 176 +val_54 54 +val_459 459 +val_51 51 +val_138 138 +val_103 103 +val_239 239 +val_213 213 +val_216 216 +val_430 430 +val_278 278 +val_176 176 +val_289 289 +val_221 221 +val_65 65 +val_318 318 +val_332 332 +val_311 311 +val_275 275 +val_137 137 +val_241 241 +val_83 83 +val_333 333 +val_180 180 +val_284 284 +val_12 12 +val_230 230 +val_181 181 +val_67 67 +val_260 260 +val_404 404 +val_384 384 +val_489 489 +val_353 353 +val_373 373 +val_272 272 +val_138 138 +val_217 217 +val_84 84 +val_348 348 +val_466 466 +val_58 58 +val_8 8 +val_411 411 +val_230 230 +val_208 208 +val_348 348 +val_24 24 +val_463 463 +val_431 431 +val_179 179 +val_172 172 +val_42 42 +val_129 129 +val_158 158 +val_119 119 +val_496 496 +val_0 0 +val_322 322 +val_197 197 +val_468 468 +val_393 393 +val_454 454 +val_100 100 +val_298 298 +val_199 199 +val_191 191 +val_418 418 +val_96 96 +val_26 26 +val_165 165 +val_327 327 +val_230 230 +val_205 205 +val_120 120 +val_131 131 +val_51 51 +val_404 404 +val_43 43 +val_436 436 +val_156 156 +val_469 469 +val_468 468 +val_308 308 +val_95 95 +val_196 196 +val_288 288 +val_481 481 +val_457 457 +val_98 98 +val_282 282 +val_197 197 +val_187 187 +val_318 318 +val_318 318 +val_409 409 +val_470 470 +val_137 137 +val_369 369 +val_316 316 +val_169 169 +val_413 413 +val_85 85 +val_77 77 +val_0 0 +val_490 490 +val_87 87 +val_364 364 +val_179 179 +val_118 118 +val_134 134 +val_395 395 +val_282 282 +val_138 138 +val_238 238 +val_419 419 +val_15 15 +val_118 118 +val_72 72 +val_90 90 +val_307 307 +val_19 19 +val_435 435 +val_10 10 +val_277 277 +val_273 273 +val_306 306 +val_224 224 +val_309 309 +val_389 389 +val_327 327 +val_242 242 +val_369 369 +val_392 392 +val_272 272 +val_331 331 +val_401 401 +val_242 242 +val_452 452 +val_177 177 +val_226 226 +val_5 5 +val_497 497 +val_402 402 +val_396 396 +val_317 317 +val_395 395 +val_58 58 +val_35 35 +val_336 336 +val_95 95 +val_11 11 +val_168 168 +val_34 34 +val_229 229 +val_233 233 +val_143 143 +val_472 472 +val_322 322 +val_498 498 +val_160 160 +val_195 195 +val_42 42 +val_321 321 +val_430 430 +val_119 119 +val_489 489 +val_458 458 +val_78 78 +val_76 76 +val_41 41 +val_223 223 +val_492 492 +val_149 149 +val_449 449 +val_218 218 +val_228 228 +val_138 138 +val_453 453 +val_30 30 +val_209 209 +val_64 64 +val_468 468 +val_76 76 +val_74 74 +val_342 342 +val_69 69 +val_230 230 +val_33 33 +val_368 368 +val_103 103 +val_296 296 +val_113 113 +val_216 216 +val_367 367 +val_344 344 +val_167 167 +val_274 274 +val_219 219 +val_239 239 +val_485 485 +val_116 116 +val_223 223 +val_256 256 +val_263 263 +val_70 70 +val_487 487 +val_480 480 +val_401 401 +val_288 288 +val_191 191 +val_5 5 +val_244 244 +val_438 438 +val_128 128 +val_467 467 +val_432 432 +val_202 202 +val_316 316 +val_229 229 +val_469 469 +val_463 463 +val_280 280 +val_2 2 +val_35 35 +val_283 283 +val_331 331 +val_235 235 +val_80 80 +val_44 44 +val_193 193 +val_321 321 +val_335 335 +val_104 104 +val_466 466 +val_366 366 +val_175 175 +val_403 403 +val_483 483 +val_53 53 +val_105 105 +val_257 257 +val_406 406 +val_409 409 +val_190 190 +val_406 406 +val_401 401 +val_114 114 +val_258 258 +val_90 90 +val_203 203 +val_262 262 +val_348 348 +val_424 424 +val_12 12 +val_396 396 +val_201 201 +val_217 217 +val_164 164 +val_431 431 +val_454 454 +val_478 478 +val_298 298 +val_125 125 +val_431 431 +val_164 164 +val_424 424 +val_187 187 +val_382 382 +val_5 5 +val_70 70 +val_397 397 +val_480 480 +val_291 291 +val_24 24 +val_351 351 +val_255 255 +val_104 104 +val_70 70 +val_163 163 +val_438 438 +val_119 119 +val_414 414 +val_200 200 +val_491 491 +val_237 237 +val_439 439 +val_360 360 +val_248 248 +val_479 479 +val_305 305 +val_417 417 +val_199 199 +val_444 444 +val_120 120 +val_429 429 +val_169 169 +val_443 443 +val_323 323 +val_325 325 +val_277 277 +val_230 230 +val_478 478 +val_178 178 +val_468 468 +val_310 310 +val_317 317 +val_333 333 +val_493 493 +val_460 460 +val_207 207 +val_249 249 +val_265 265 +val_480 480 +val_83 83 +val_136 136 +val_353 353 +val_172 172 +val_214 214 +val_462 462 +val_233 233 +val_406 406 +val_133 133 +val_175 175 +val_189 189 +val_454 454 +val_375 375 +val_401 401 +val_421 421 +val_407 407 +val_384 384 +val_256 256 +val_26 26 +val_134 134 +val_67 67 +val_384 384 +val_379 379 +val_18 18 +val_462 462 +val_492 492 +val_100 100 +val_298 298 +val_9 9 +val_341 341 +val_498 498 +val_146 146 +val_458 458 +val_362 362 +val_186 186 +val_285 285 +val_348 348 +val_167 167 +val_18 18 +val_273 273 +val_183 183 +val_281 281 +val_344 344 +val_97 97 +val_469 469 +val_315 315 +val_84 84 +val_28 28 +val_37 37 +val_448 448 +val_152 152 +val_348 348 +val_307 307 +val_194 194 +val_414 414 +val_477 477 +val_222 222 +val_126 126 +val_90 90 +val_169 169 +val_403 403 +val_400 400 +val_200 200 +val_97 97 diff --git a/ql/src/test/results/clientpositive/llap/insert_dir_distcp.q.out b/ql/src/test/results/clientpositive/llap/insert_dir_distcp.q.out new file mode 100644 index 0000000..b70fa01 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/insert_dir_distcp.q.out @@ -0,0 +1,14 @@ +PREHOOK: query: -- see TEZ-2931 for using INFO logging + +#### A masked pattern was here #### +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- see TEZ-2931 for using INFO logging + +#### A masked pattern was here #### +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +Found 1 items +#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/join_acid_non_acid.q.out b/ql/src/test/results/clientpositive/llap/join_acid_non_acid.q.out new file mode 100644 index 0000000..4905351 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/join_acid_non_acid.q.out @@ -0,0 +1,58 @@ +PREHOOK: query: CREATE TABLE orc_update_table (k1 INT, f1 STRING, op_code STRING) +CLUSTERED BY (k1) INTO 2 BUCKETS +STORED AS ORC TBLPROPERTIES("transactional"="true") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_update_table +POSTHOOK: query: CREATE TABLE orc_update_table (k1 INT, f1 STRING, op_code STRING) +CLUSTERED BY (k1) INTO 2 BUCKETS +STORED AS ORC TBLPROPERTIES("transactional"="true") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_update_table +PREHOOK: query: INSERT INTO TABLE orc_update_table VALUES (1, 'a', 'I') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@orc_update_table +POSTHOOK: query: INSERT INTO TABLE orc_update_table VALUES (1, 'a', 'I') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@orc_update_table +POSTHOOK: Lineage: orc_update_table.f1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: orc_update_table.k1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: orc_update_table.op_code SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: CREATE TABLE orc_table (k1 INT, f1 STRING) +CLUSTERED BY (k1) SORTED BY (k1) INTO 2 BUCKETS +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_table +POSTHOOK: query: CREATE TABLE orc_table (k1 INT, f1 STRING) +CLUSTERED BY (k1) SORTED BY (k1) INTO 2 BUCKETS +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_table +PREHOOK: query: INSERT OVERWRITE TABLE orc_table VALUES (1, 'x') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@orc_table +POSTHOOK: query: INSERT OVERWRITE TABLE orc_table VALUES (1, 'x') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@orc_table +POSTHOOK: Lineage: orc_table.f1 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: orc_table.k1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: SELECT t1.*, t2.* FROM orc_table t1 +JOIN orc_update_table t2 ON t1.k1=t2.k1 ORDER BY t1.k1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_table +PREHOOK: Input: default@orc_update_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.*, t2.* FROM orc_table t1 +JOIN orc_update_table t2 ON t1.k1=t2.k1 ORDER BY t1.k1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_table +POSTHOOK: Input: default@orc_update_table +#### A masked pattern was here #### +1 x 1 a I diff --git a/ql/src/test/results/clientpositive/llap/leftsemijoin_mr.q.out b/ql/src/test/results/clientpositive/llap/leftsemijoin_mr.q.out new file mode 100644 index 0000000..fe63057 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/leftsemijoin_mr.q.out @@ -0,0 +1,98 @@ +PREHOOK: query: CREATE TABLE T1(key INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/leftsemijoin_mr_t1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/leftsemijoin_mr_t1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/leftsemijoin_mr_t2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/leftsemijoin_mr_t2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: -- Run this query using TestMinimrCliDriver + +SELECT * FROM T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: -- Run this query using TestMinimrCliDriver + +SELECT * FROM T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 +1 +PREHOOK: query: SELECT * FROM T2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM T2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +PREHOOK: query: SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp ON (T1.key=tmp.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp ON (T1.key=tmp.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +1 +1 +PREHOOK: query: SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp ON (T1.key=tmp.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp ON (T1.key=tmp.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +1 +1 diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out new file mode 100644 index 0000000..2af0d6e --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out @@ -0,0 +1,256 @@ +PREHOOK: query: -- run this test case in minimr to ensure it works in cluster + +-- list bucketing DML: static partition. multiple skewed columns. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key) on ('484','51','103') + stored as DIRECTORIES + STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@list_bucketing_static_part +POSTHOOK: query: -- run this test case in minimr to ensure it works in cluster + +-- list bucketing DML: static partition. multiple skewed columns. +-- ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME: +-- 5263 000000_0 +-- 5263 000001_0 +-- ds=2008-04-08/hr=11/key=103/value=val_103: +-- 99 000000_0 +-- 99 000001_0 +-- ds=2008-04-08/hr=11/key=484/value=val_484: +-- 87 000000_0 +-- 87 000001_0 + +-- create a skewed table +create table list_bucketing_static_part (key String, value String) + partitioned by (ds String, hr String) + skewed by (key) on ('484','51','103') + stored as DIRECTORIES + STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@list_bucketing_static_part +PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +PREHOOK: type: QUERY +POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files. +explain extended +insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2008-04-08/hr=11/ + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 11 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.list_bucketing_static_part + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct list_bucketing_static_part { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.list_bucketing_static_part + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') +select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- check DML result +show partitions list_bucketing_static_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: -- check DML result +show partitions list_bucketing_static_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@list_bucketing_static_part +ds=2008-04-08/hr=11 +PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@list_bucketing_static_part +POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@list_bucketing_static_part +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +ds string +hr string + +# Detailed Partition Information +Partition Value: [2008-04-08, 11] +Database: default +Table: list_bucketing_static_part +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 4 + numRows 500 + rawDataSize 4812 + totalSize 5520 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Stored As SubDirectories: Yes +Skewed Columns: [key] +Skewed Values: [[103], [484], [51]] +#### A masked pattern was here #### +Skewed Value to Truncated Path: {[103]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=103, [484]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=484, [51]=/list_bucketing_static_part/ds=2008-04-08/hr=11/key=51} +Storage Desc Params: + serialization.format 1 diff --git a/ql/src/test/results/clientpositive/llap/load_fs2.q.out b/ql/src/test/results/clientpositive/llap/load_fs2.q.out new file mode 100644 index 0000000..1846542 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/load_fs2.q.out @@ -0,0 +1,121 @@ +PREHOOK: query: -- HIVE-3300 [jira] LOAD DATA INPATH fails if a hdfs file with same name is added to table +-- 'loader' table is used only for uploading kv1.txt to HDFS (!hdfs -put is not working on minMRDriver) + +create table result (key string, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@result +POSTHOOK: query: -- HIVE-3300 [jira] LOAD DATA INPATH fails if a hdfs file with same name is added to table +-- 'loader' table is used only for uploading kv1.txt to HDFS (!hdfs -put is not working on minMRDriver) + +create table result (key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@result +PREHOOK: query: create table loader (key string, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loader +POSTHOOK: query: create table loader (key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loader +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table loader +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@loader +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table loader +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@loader +PREHOOK: query: load data inpath '/build/ql/test/data/warehouse/loader/kv1.txt' into table result +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@result +POSTHOOK: query: load data inpath '/build/ql/test/data/warehouse/loader/kv1.txt' into table result +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@result +PREHOOK: query: show table extended like result +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like result +POSTHOOK: type: SHOW_TABLESTATUS +tableName:result +#### A masked pattern was here #### +inputformat:org.apache.hadoop.mapred.TextInputFormat +outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +columns:struct columns { string key, string value} +partitioned:false +partitionColumns: +totalNumberFiles:1 +totalFileSize:5812 +maxFileSize:5812 +minFileSize:5812 +#### A masked pattern was here #### + +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table loader +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@loader +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table loader +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@loader +PREHOOK: query: load data inpath '/build/ql/test/data/warehouse/loader/kv1.txt' into table result +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@result +POSTHOOK: query: load data inpath '/build/ql/test/data/warehouse/loader/kv1.txt' into table result +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@result +PREHOOK: query: show table extended like result +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like result +POSTHOOK: type: SHOW_TABLESTATUS +tableName:result +#### A masked pattern was here #### +inputformat:org.apache.hadoop.mapred.TextInputFormat +outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +columns:struct columns { string key, string value} +partitioned:false +partitionColumns: +totalNumberFiles:2 +totalFileSize:11624 +maxFileSize:5812 +minFileSize:5812 +#### A masked pattern was here #### + +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table loader +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@loader +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table loader +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@loader +PREHOOK: query: load data inpath '/build/ql/test/data/warehouse/loader/kv1.txt' into table result +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@result +POSTHOOK: query: load data inpath '/build/ql/test/data/warehouse/loader/kv1.txt' into table result +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@result +PREHOOK: query: show table extended like result +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like result +POSTHOOK: type: SHOW_TABLESTATUS +tableName:result +#### A masked pattern was here #### +inputformat:org.apache.hadoop.mapred.TextInputFormat +outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +columns:struct columns { string key, string value} +partitioned:false +partitionColumns: +totalNumberFiles:3 +totalFileSize:17436 +maxFileSize:5812 +minFileSize:5812 +#### A masked pattern was here #### + diff --git a/ql/src/test/results/clientpositive/llap/load_hdfs_file_with_space_in_the_name.q.out b/ql/src/test/results/clientpositive/llap/load_hdfs_file_with_space_in_the_name.q.out new file mode 100644 index 0000000..d934722 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/load_hdfs_file_with_space_in_the_name.q.out @@ -0,0 +1,25 @@ +PREHOOK: query: CREATE TABLE load_file_with_space_in_the_name(name STRING, age INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@load_file_with_space_in_the_name +POSTHOOK: query: CREATE TABLE load_file_with_space_in_the_name(name STRING, age INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@load_file_with_space_in_the_name +#### A masked pattern was here #### +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load_file_with_space_in_the_name +#### A masked pattern was here #### +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load_file_with_space_in_the_name +#### A masked pattern was here #### +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@load_file_with_space_in_the_name +#### A masked pattern was here #### +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@load_file_with_space_in_the_name +#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/non_native_window_udf.q.out b/ql/src/test/results/clientpositive/llap/non_native_window_udf.q.out new file mode 100644 index 0000000..605e5b2 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/non_native_window_udf.q.out @@ -0,0 +1,52 @@ +PREHOOK: query: create temporary function mylastval as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLastValue' +PREHOOK: type: CREATEFUNCTION +PREHOOK: Output: mylastval +POSTHOOK: query: create temporary function mylastval as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFLastValue' +POSTHOOK: type: CREATEFUNCTION +POSTHOOK: Output: mylastval +PREHOOK: query: select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l, +mylastval(p_size, false) over w1 as m +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l, +mylastval(p_size, false) over w1 as m +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2 2 2 34 34 +Manufacturer#1 almond antique burnished rose metallic 2 2 2 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 34 34 2 28 28 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 6 2 42 42 +Manufacturer#1 almond aquamarine burnished black steel 28 28 34 42 42 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 6 42 42 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 2 2 +Manufacturer#2 almond antique violet turquoise frosted 40 40 14 25 25 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 14 18 18 +Manufacturer#2 almond aquamarine rose maroon antique 25 25 40 18 18 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 2 18 18 +Manufacturer#3 almond antique chartreuse khaki white 17 17 17 19 19 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 17 1 1 +Manufacturer#3 almond antique metallic orange dim 19 19 17 45 45 +Manufacturer#3 almond antique misty red olive 1 1 14 45 45 +Manufacturer#3 almond antique olive coral navajo 45 45 19 45 45 +Manufacturer#4 almond antique gainsboro frosted violet 10 10 10 27 27 +Manufacturer#4 almond antique violet mint lemon 39 39 10 7 7 +Manufacturer#4 almond aquamarine floral ivory bisque 27 27 10 12 12 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7 39 12 12 +Manufacturer#4 almond azure aquamarine papaya violet 12 12 27 12 12 +Manufacturer#5 almond antique blue firebrick mint 31 31 31 2 2 +Manufacturer#5 almond antique medium spring khaki 6 6 31 46 46 +Manufacturer#5 almond antique sky peru orange 2 2 31 23 23 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 6 23 23 +Manufacturer#5 almond azure blanched chiffon midnight 23 23 2 23 23 diff --git a/ql/src/test/results/clientpositive/llap/quotedid_smb.q.out b/ql/src/test/results/clientpositive/llap/quotedid_smb.q.out new file mode 100644 index 0000000..8e850f5 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/quotedid_smb.q.out @@ -0,0 +1,81 @@ +PREHOOK: query: create table src_b(`x+1` string, `!@#$%^&*()_q` string) +clustered by (`!@#$%^&*()_q`) sorted by (`!@#$%^&*()_q`) into 2 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_b +POSTHOOK: query: create table src_b(`x+1` string, `!@#$%^&*()_q` string) +clustered by (`!@#$%^&*()_q`) sorted by (`!@#$%^&*()_q`) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_b +PREHOOK: query: insert overwrite table src_b +select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_b +POSTHOOK: query: insert overwrite table src_b +select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_b +POSTHOOK: Lineage: src_b.!@#$%^&*()_q SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_b.x+1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: create table src_b2(`x+1` string, `!@#$%^&*()_q` string) +clustered by (`!@#$%^&*()_q`) sorted by (`!@#$%^&*()_q`) into 2 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_b2 +POSTHOOK: query: create table src_b2(`x+1` string, `!@#$%^&*()_q` string) +clustered by (`!@#$%^&*()_q`) sorted by (`!@#$%^&*()_q`) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_b2 +PREHOOK: query: insert overwrite table src_b2 +select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_b2 +POSTHOOK: query: insert overwrite table src_b2 +select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_b2 +POSTHOOK: Lineage: src_b2.!@#$%^&*()_q SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_b2.x+1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q` +from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q` +where a.`x+1` < '11' +PREHOOK: type: QUERY +PREHOOK: Input: default@src_b +PREHOOK: Input: default@src_b2 +#### A masked pattern was here #### +POSTHOOK: query: select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q` +from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q` +where a.`x+1` < '11' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_b +POSTHOOK: Input: default@src_b2 +#### A masked pattern was here #### +10 val_10 10 val_10 +100 val_100 100 val_100 +100 val_100 100 val_100 +100 val_100 100 val_100 +100 val_100 100 val_100 +104 val_104 104 val_104 +104 val_104 104 val_104 +104 val_104 104 val_104 +104 val_104 104 val_104 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +103 val_103 103 val_103 +103 val_103 103 val_103 +103 val_103 103 val_103 +103 val_103 103 val_103 +105 val_105 105 val_105 diff --git a/ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out b/ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out new file mode 100644 index 0000000..219819a --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out @@ -0,0 +1,387 @@ +PREHOOK: query: CREATE TABLE bucket5_1(key string, value string) CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket5_1 +POSTHOOK: query: CREATE TABLE bucket5_1(key string, value string) CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket5_1 +PREHOOK: query: explain extended +insert overwrite table bucket5_1 +select * from src cluster by key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucket5_1 +select * from src cluster by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 2 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket5_1 + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct bucket5_1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket5_1 + TotalFiles: 2 + GatherStats: true + MultiFileSpray: true + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket5_1 + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct bucket5_1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket5_1 + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table bucket5_1 +select * from src cluster by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bucket5_1 +POSTHOOK: query: insert overwrite table bucket5_1 +select * from src cluster by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bucket5_1 +POSTHOOK: Lineage: bucket5_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: bucket5_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select sum(hash(key)),sum(hash(value)) from bucket5_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket5_1 +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(key)),sum(hash(value)) from bucket5_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket5_1 +#### A masked pattern was here #### +21025334 36210398070 +PREHOOK: query: select sum(hash(key)),sum(hash(value)) from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(key)),sum(hash(value)) from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +21025334 36210398070 +PREHOOK: query: create table complex_tbl_1(aid string, bid string, t int, ctime string, etime bigint, l string, et string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@complex_tbl_1 +POSTHOOK: query: create table complex_tbl_1(aid string, bid string, t int, ctime string, etime bigint, l string, et string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@complex_tbl_1 +PREHOOK: query: create table complex_tbl_2(aet string, aes string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@complex_tbl_2 +POSTHOOK: query: create table complex_tbl_2(aet string, aes string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@complex_tbl_2 +PREHOOK: query: explain extended +insert overwrite table complex_tbl_1 partition (ds='2010-03-29') +select s2.* from +( + select TRANSFORM (aid,bid,t,ctime,etime,l,et) + USING 'cat' + AS (aid string, bid string, t int, ctime string, etime bigint, l string, et string) + from + ( + select transform(aet,aes) + using 'cat' + as (aid string, bid string, t int, ctime string, etime bigint, l string, et string) + from complex_tbl_2 where ds ='2010-03-29' cluster by bid +)s +)s2 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table complex_tbl_1 partition (ds='2010-03-29') +select s2.* from +( + select TRANSFORM (aid,bid,t,ctime,etime,l,et) + USING 'cat' + AS (aid string, bid string, t int, ctime string, etime bigint, l string, et string) + from + ( + select transform(aet,aes) + using 'cat' + as (aid string, bid string, t int, ctime string, etime bigint, l string, et string) + from complex_tbl_2 where ds ='2010-03-29' cluster by bid +)s +)s2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: complex_tbl_2 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: PARTIAL + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (ds = '2010-03-29') (type: boolean) + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: aet (type: string), aes (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string,string,int,string,bigint,string,string + field.delim 9 + serialization.format 9 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: bigint), _col5 (type: string), _col6 (type: string) + auto parallelism: true + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: string), VALUE._col4 (type: bigint), VALUE._col5 (type: string), VALUE._col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string,string,int,string,bigint,string,string + field.delim 9 + serialization.format 9 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2010-03-29/ + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns aid,bid,t,ctime,etime,l,et + columns.comments + columns.types string:string:int:string:bigint:string:string +#### A masked pattern was here #### + name default.complex_tbl_1 + partition_columns ds + partition_columns.types string + serialization.ddl struct complex_tbl_1 { string aid, string bid, i32 t, string ctime, i64 etime, string l, string et} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.complex_tbl_1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2010-03-29 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns aid,bid,t,ctime,etime,l,et + columns.comments + columns.types string:string:int:string:bigint:string:string +#### A masked pattern was here #### + name default.complex_tbl_1 + partition_columns ds + partition_columns.types string + serialization.ddl struct complex_tbl_1 { string aid, string bid, i32 t, string ctime, i64 etime, string l, string et} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.complex_tbl_1 + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + diff --git a/ql/src/test/results/clientpositive/llap/remote_script.q.out b/ql/src/test/results/clientpositive/llap/remote_script.q.out new file mode 100644 index 0000000..87531f0 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/remote_script.q.out @@ -0,0 +1,49 @@ +PREHOOK: query: create table tmp_tmp(key string, value string) stored as rcfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp_tmp +POSTHOOK: query: create table tmp_tmp(key string, value string) stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp_tmp +PREHOOK: query: insert overwrite table tmp_tmp +SELECT TRANSFORM(key, value) USING +'python newline.py' AS key, value FROM src limit 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tmp_tmp +POSTHOOK: query: insert overwrite table tmp_tmp +SELECT TRANSFORM(key, value) USING +'python newline.py' AS key, value FROM src limit 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tmp_tmp +POSTHOOK: Lineage: tmp_tmp.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tmp_tmp.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from tmp_tmp ORDER BY key ASC, value ASC +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_tmp +#### A masked pattern was here #### +POSTHOOK: query: select * from tmp_tmp ORDER BY key ASC, value ASC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_tmp +#### A masked pattern was here #### +1 2 NULL +1 2 NULL +1 +2 NULL +1 +2 NULL +1 +2 NULL +1 +2 NULL +#### A masked pattern was here #### +PREHOOK: query: drop table tmp_tmp +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tmp_tmp +PREHOOK: Output: default@tmp_tmp +POSTHOOK: query: drop table tmp_tmp +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tmp_tmp +POSTHOOK: Output: default@tmp_tmp diff --git a/ql/src/test/results/clientpositive/llap/schemeAuthority.q.out b/ql/src/test/results/clientpositive/llap/schemeAuthority.q.out new file mode 100644 index 0000000..9a6019c --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/schemeAuthority.q.out @@ -0,0 +1,84 @@ +PREHOOK: query: create external table dynPart (key string) partitioned by (value string) row format delimited fields terminated by '\\t' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dynPart +POSTHOOK: query: create external table dynPart (key string) partitioned by (value string) row format delimited fields terminated by '\\t' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dynPart +#### A masked pattern was here #### +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +PREHOOK: Output: default@dynpart +#### A masked pattern was here #### +POSTHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +POSTHOOK: Output: default@dynpart +POSTHOOK: Output: default@dynpart@value=0 +#### A masked pattern was here #### +PREHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +PREHOOK: Output: default@dynpart +#### A masked pattern was here #### +POSTHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +POSTHOOK: Output: default@dynpart +POSTHOOK: Output: default@dynpart@value=1 +PREHOOK: query: select count(*) from dynPart +PREHOOK: type: QUERY +PREHOOK: Input: default@dynpart +PREHOOK: Input: default@dynpart@value=0 +PREHOOK: Input: default@dynpart@value=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from dynPart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dynpart +POSTHOOK: Input: default@dynpart@value=0 +POSTHOOK: Input: default@dynpart@value=1 +#### A masked pattern was here #### +2 +PREHOOK: query: select key from dynPart +PREHOOK: type: QUERY +PREHOOK: Input: default@dynpart +PREHOOK: Input: default@dynpart@value=0 +PREHOOK: Input: default@dynpart@value=1 +#### A masked pattern was here #### +POSTHOOK: query: select key from dynPart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dynpart +POSTHOOK: Input: default@dynpart@value=0 +POSTHOOK: Input: default@dynpart@value=1 +#### A masked pattern was here #### +10 +20 +PREHOOK: query: select key from src where (key = 10) order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key from src where (key = 10) order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +10 +PREHOOK: query: select key from src where (key = 20) order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key from src where (key = 20) order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +20 +#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/schemeAuthority2.q.out b/ql/src/test/results/clientpositive/llap/schemeAuthority2.q.out new file mode 100644 index 0000000..60913f2 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/schemeAuthority2.q.out @@ -0,0 +1,53 @@ +PREHOOK: query: create external table dynPart (key string) partitioned by (value string, value2 string) row format delimited fields terminated by '\\t' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dynPart +POSTHOOK: query: create external table dynPart (key string) partitioned by (value string, value2 string) row format delimited fields terminated by '\\t' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dynPart +#### A masked pattern was here #### +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +PREHOOK: Output: default@dynpart +#### A masked pattern was here #### +POSTHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +POSTHOOK: Output: default@dynpart +POSTHOOK: Output: default@dynpart@value=0/value2=clusterA +#### A masked pattern was here #### +PREHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +PREHOOK: Output: default@dynpart +#### A masked pattern was here #### +POSTHOOK: type: ALTERTABLE_ADDPARTS +#### A masked pattern was here #### +POSTHOOK: Output: default@dynpart +POSTHOOK: Output: default@dynpart@value=0/value2=clusterB +PREHOOK: query: select value2, key from dynPart where value='0' +PREHOOK: type: QUERY +PREHOOK: Input: default@dynpart +PREHOOK: Input: default@dynpart@value=0/value2=clusterA +PREHOOK: Input: default@dynpart@value=0/value2=clusterB +#### A masked pattern was here #### +POSTHOOK: query: select value2, key from dynPart where value='0' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dynpart +POSTHOOK: Input: default@dynpart@value=0/value2=clusterA +POSTHOOK: Input: default@dynpart@value=0/value2=clusterB +#### A masked pattern was here #### +clusterA 10 +clusterB 20 +#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/table_nonprintable.q.out b/ql/src/test/results/clientpositive/llap/table_nonprintable.q.out new file mode 100644 index 0000000..d7c93f2 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/table_nonprintable.q.out @@ -0,0 +1,72 @@ +Found 1 items +#### A masked pattern was here #### +Found 1 items +#### A masked pattern was here #### +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: create external table table_external (c1 int, c2 int) +partitioned by (day string) +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@table_external +POSTHOOK: query: create external table table_external (c1 int, c2 int) +partitioned by (day string) +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_external +PREHOOK: query: msck repair table table_external +PREHOOK: type: MSCK +POSTHOOK: query: msck repair table table_external +POSTHOOK: type: MSCK +Partitions not in metastore: table_external:day=¢Bar +Repair: Cannot add partition table_external:day=Foo due to invalid characters in the name +Repair: Added partition to metastore table_external:day=¢Bar +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: show partitions table_external +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@table_external +POSTHOOK: query: show partitions table_external +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@table_external +day=¢Bar +PREHOOK: query: select * from table_external +PREHOOK: type: QUERY +PREHOOK: Input: default@table_external +PREHOOK: Input: default@table_external@day=¢Bar +#### A masked pattern was here #### +POSTHOOK: query: select * from table_external +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_external +POSTHOOK: Input: default@table_external@day=¢Bar +#### A masked pattern was here #### +NULL 35 ¢Bar +48 NULL ¢Bar +100 100 ¢Bar +PREHOOK: query: alter table table_external drop partition (day='¢Bar') +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: default@table_external +PREHOOK: Output: default@table_external@day=¢Bar +POSTHOOK: query: alter table table_external drop partition (day='¢Bar') +POSTHOOK: type: ALTERTABLE_DROPPARTS +POSTHOOK: Input: default@table_external +POSTHOOK: Output: default@table_external@day=¢Bar +PREHOOK: query: show partitions table_external +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@table_external +POSTHOOK: query: show partitions table_external +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@table_external +PREHOOK: query: drop table table_external +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table_external +PREHOOK: Output: default@table_external +POSTHOOK: query: drop table table_external +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table_external +POSTHOOK: Output: default@table_external +#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/temp_table_external.q.out b/ql/src/test/results/clientpositive/llap/temp_table_external.q.out new file mode 100644 index 0000000..001cd98 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/temp_table_external.q.out @@ -0,0 +1,34 @@ +Found 1 items +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@temp_table_external +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@temp_table_external +PREHOOK: query: select * from temp_table_external +PREHOOK: type: QUERY +PREHOOK: Input: default@temp_table_external +#### A masked pattern was here #### +POSTHOOK: query: select * from temp_table_external +POSTHOOK: type: QUERY +POSTHOOK: Input: default@temp_table_external +#### A masked pattern was here #### +NULL 35 +48 NULL +100 100 +PREHOOK: query: -- Even after we drop the table, the data directory should still be there +drop table temp_table_external +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@temp_table_external +PREHOOK: Output: default@temp_table_external +POSTHOOK: query: -- Even after we drop the table, the data directory should still be there +drop table temp_table_external +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@temp_table_external +POSTHOOK: Output: default@temp_table_external +Found 1 items +#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/uber_reduce.q.out b/ql/src/test/results/clientpositive/llap/uber_reduce.q.out new file mode 100644 index 0000000..2a29131 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/uber_reduce.q.out @@ -0,0 +1,31 @@ +PREHOOK: query: -- Uberized mode is a YARN option, ignore this test for non-YARN Hadoop versions +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) + +CREATE TABLE T1(key STRING, val STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: -- Uberized mode is a YARN option, ignore this test for non-YARN Hadoop versions +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) + +CREATE TABLE T1(key STRING, val STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: SELECT count(*) FROM T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) FROM T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +6