diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 22ac682..130ff6a 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -405,272 +405,478 @@ minimr.query.negative.files=cluster_tasklog_retrieval.q,\ minimr_broken_pipe.q,\ udf_local_resource.q -spark.query.files=add_part_multiple.q \ - alter_merge_orc.q \ - alter_merge_stats_orc.q \ - auto_smb_mapjoin_14.q \ - avro_compression_enabled_native.q \ - avro_decimal_native.q \ - bucket2.q \ - bucket3.q \ - bucket4.q \ - column_access_stats.q \ - count.q \ - create_merge_compressed.q \ - ctas.q \ - custom_input_output_format.q \ - date_udf.q \ - disable_merge_for_bucketing.q \ - enforce_order.q \ - escape_clusterby1.q \ - escape_distributeby1.q \ - escape_orderby1.q \ - escape_sortby1.q \ - groupby1.q \ - groupby10.q \ - groupby11.q \ - groupby2.q \ - groupby3.q \ - groupby3_map.q \ - groupby3_map_multi_distinct.q \ - groupby3_map_skew.q \ - groupby3_noskew.q \ - groupby3_noskew_multi_distinct.q \ - groupby4.q \ - groupby7.q \ - groupby7_map.q \ - groupby7_map_multi_single_reducer.q \ - groupby7_map_skew.q \ - groupby7_noskew.q \ - groupby7_noskew_multi_single_reducer.q \ - groupby8.q \ - groupby8_map.q \ - groupby8_map_skew.q \ - groupby8_noskew.q \ - groupby9.q \ - groupby_complex_types.q \ - groupby_complex_types_multi_single_reducer.q \ - groupby_cube1.q \ - groupby_multi_insert_common_distinct.q \ - groupby_multi_single_reducer.q \ - groupby_multi_single_reducer2.q \ - groupby_multi_single_reducer3.q \ - groupby_position.q \ - groupby_ppr.q \ - groupby_rollup1.q \ - groupby_sort_1_23.q \ - groupby_sort_skew_1_23.q \ - having.q \ - innerjoin.q \ - input12.q \ - input13.q \ - input14.q \ - input17.q \ - input18.q \ - input1_limit.q \ - input_part2.q \ - insert1.q \ - insert_into1.q \ - insert_into2.q \ - insert_into3.q \ - join0.q \ - join1.q \ - join2.q \ - join3.q \ - join4.q \ - join5.q \ - join6.q \ - join7.q \ - join8.q \ - join9.q \ - join10.q \ - join11.q \ - join12.q \ - join13.q \ - join14.q \ - join15.q \ - join16.q \ - join17.q \ - join18.q \ - join19.q \ - join20.q \ - join21.q \ - join22.q \ - join23.q \ - join24.q \ - join25.q \ - join26.q \ - join27.q \ - join_1to1.q \ - join_casesensitive.q \ - join_nullsafe.q \ - limit_pushdown.q \ - load_dyn_part1.q \ - load_dyn_part2.q \ - load_dyn_part3.q \ - mapreduce1.q \ - mapreduce2.q \ - merge1.q \ - merge2.q \ - metadata_only_queries.q \ - metadata_only_queries_with_filters.q \ - multi_insert.q \ - multi_insert_gby.q \ - multi_insert_gby2.q \ - multi_insert_gby3.q \ - multi_insert_lateral_view.q \ - multi_insert_mixed.q \ - multi_insert_move_tasks_share_dependencies.q \ - multigroupby_singlemr.q \ - optimize_nullscan.q \ - order.q \ - order2.q \ - parallel.q \ +spark.query.files=add_part_multiple.q, \ + alter_merge_orc.q, \ + alter_merge_stats_orc.q, \ + annotate_stats_join.q, \ + auto_join0.q, \ + auto_join1.q, \ + auto_join10.q, \ + auto_join11.q, \ + auto_join12.q, \ + auto_join13.q, \ + auto_join14.q, \ + auto_join15.q, \ + auto_join16.q, \ + auto_join17.q, \ + auto_join18.q, \ + auto_join18_multi_distinct.q, \ + auto_join19.q, \ + auto_join2.q, \ + auto_join20.q, \ + auto_join21.q, \ + auto_join22.q, \ + auto_join23.q, \ + auto_join24.q, \ + auto_join25.q, \ + auto_join27.q, \ + auto_join28.q, \ + auto_join29.q, \ + auto_join3.q, \ + auto_join30.q, \ + auto_join31.q, \ + auto_join32.q, \ + auto_join4.q, \ + auto_join5.q, \ + auto_join6.q, \ + auto_join7.q, \ + auto_join8.q, \ + auto_join9.q, \ + auto_join_filters.q, \ + auto_join_nulls.q, \ + auto_join_reordering_values.q, \ + auto_smb_mapjoin_14.q, \ + auto_sortmerge_join_1.q, \ + auto_sortmerge_join_10.q, \ + auto_sortmerge_join_11.q, \ + auto_sortmerge_join_12.q, \ + auto_sortmerge_join_13.q, \ + auto_sortmerge_join_14.q, \ + auto_sortmerge_join_15.q, \ + auto_sortmerge_join_2.q, \ + auto_sortmerge_join_3.q, \ + auto_sortmerge_join_4.q, \ + auto_sortmerge_join_5.q, \ + auto_sortmerge_join_6.q, \ + auto_sortmerge_join_7.q, \ + auto_sortmerge_join_8.q, \ + auto_sortmerge_join_9.q, \ + avro_compression_enabled_native.q, \ + avro_decimal_native.q, \ + avro_joins.q, \ + avro_joins_native.q, \ + bucket2.q, \ + bucket3.q, \ + bucket4.q, \ + bucket_map_join_1.q, \ + bucket_map_join_2.q, \ + bucket_map_join_tez1.q, \ + bucket_map_join_tez2.q, \ + bucketmapjoin1.q, \ + bucketmapjoin10.q, \ + bucketmapjoin11.q, \ + bucketmapjoin12.q, \ + bucketmapjoin13.q, \ + bucketmapjoin2.q, \ + bucketmapjoin3.q, \ + bucketmapjoin4.q, \ + bucketmapjoin5.q, \ + bucketmapjoin6.q, \ + bucketmapjoin7.q, \ + bucketmapjoin8.q, \ + bucketmapjoin9.q, \ + bucketmapjoin_negative.q, \ + bucketmapjoin_negative2.q, \ + bucketmapjoin_negative3.q, \ + column_access_stats.q, \ + count.q, \ + create_merge_compressed.q, \ + cross_join.q, \ + ctas.q, \ + custom_input_output_format.q, \ + date_udf.q, \ + decimal_join.q, \ + disable_merge_for_bucketing.q, \ + enforce_order.q, \ + escape_clusterby1.q, \ + escape_distributeby1.q, \ + escape_orderby1.q, \ + escape_sortby1.q, \ + groupby1.q, \ + groupby10.q, \ + groupby11.q, \ + groupby2.q, \ + groupby3.q, \ + groupby3_map.q, \ + groupby3_map_multi_distinct.q, \ + groupby3_map_skew.q, \ + groupby3_noskew.q, \ + groupby3_noskew_multi_distinct.q, \ + groupby4.q, \ + groupby7.q, \ + groupby7_map.q, \ + groupby7_map_multi_single_reducer.q, \ + groupby7_map_skew.q, \ + groupby7_noskew.q, \ + groupby7_noskew_multi_single_reducer.q, \ + groupby8.q, \ + groupby8_map.q, \ + groupby8_map_skew.q, \ + groupby8_noskew.q, \ + groupby9.q, \ + groupby_complex_types.q, \ + groupby_complex_types_multi_single_reducer.q, \ + groupby_cube1.q, \ + groupby_multi_insert_common_distinct.q, \ + groupby_multi_single_reducer.q, \ + groupby_multi_single_reducer2.q, \ + groupby_multi_single_reducer3.q, \ + groupby_position.q, \ + groupby_ppr.q, \ + groupby_rollup1.q, \ + groupby_sort_1_23.q, \ + groupby_sort_skew_1_23.q, \ + having.q, \ + index_auto_self_join.q, \ + infer_bucket_sort_convert_join.q, \ + innerjoin.q, \ + input12.q, \ + input13.q, \ + input14.q, \ + input17.q, \ + input18.q, \ + input1_limit.q, \ + input_part2.q, \ + insert1.q, \ + insert_into1.q, \ + insert_into2.q, \ + insert_into3.q, \ + join0.q, \ + join1.q, \ + join10.q, \ + join11.q, \ + join12.q, \ + join13.q, \ + join14.q, \ + join15.q, \ + join16.q, \ + join17.q, \ + join18.q, \ + join18_multi_distinct.q, \ + join19.q, \ + join2.q, \ + join20.q, \ + join21.q, \ + join22.q, \ + join23.q, \ + join24.q, \ + join25.q, \ + join26.q, \ + join27.q, \ + join28.q, \ + join29.q, \ + join3.q, \ + join30.q, \ + join31.q, \ + join32.q, \ + join32_lessSize.q, \ + join33.q, \ + join34.q, \ + join35.q, \ + join36.q, \ + join37.q, \ + join38.q, \ + join39.q, \ + join4.q, \ + join41.q, \ + join5.q, \ + join6.q, \ + join7.q, \ + join8.q, \ + join9.q, \ + join_1to1.q, \ + join_alt_syntax.q, \ + join_array.q, \ + join_casesensitive.q, \ + join_cond_pushdown_1.q, \ + join_cond_pushdown_2.q, \ + join_cond_pushdown_3.q, \ + join_cond_pushdown_4.q, \ + join_cond_pushdown_unqual1.q, \ + join_cond_pushdown_unqual2.q, \ + join_cond_pushdown_unqual3.q, \ + join_cond_pushdown_unqual4.q, \ + join_empty.q, \ + join_filters.q, \ + join_filters_overlap.q, \ + join_hive_626.q, \ + join_literals.q, \ + join_map_ppr.q, \ + join_merge_multi_expressions.q, \ + join_merging.q, \ + join_nulls.q, \ + join_nullsafe.q, \ + join_rc.q, \ + join_reorder.q, \ + join_reorder2.q, \ + join_reorder3.q, \ + join_reorder4.q, \ + join_star.q, \ + join_thrift.q, \ + join_vc.q, \ + join_view.q, \ + leftsemijoin.q, \ + leftsemijoin_mr.q, \ + limit_pushdown.q, \ + load_dyn_part1.q, \ + load_dyn_part10.q, \ + load_dyn_part11.q, \ + load_dyn_part12.q, \ + load_dyn_part13.q, \ + load_dyn_part14.q, \ + load_dyn_part15.q, \ + load_dyn_part2.q, \ + load_dyn_part3.q, \ + load_dyn_part4.q, \ + load_dyn_part5.q, \ + load_dyn_part6.q, \ + load_dyn_part7.q, \ + load_dyn_part8.q, \ + load_dyn_part9.q, \ + louter_join_ppr.q, \ + mapjoin_addjar.q, \ + mapjoin_decimal.q, \ + mapjoin_distinct.q, \ + mapjoin_filter_on_outerjoin.q, \ + mapjoin_hook.q, \ + mapjoin_mapjoin.q, \ + mapjoin_memcheck.q, \ + mapjoin_subquery.q, \ + mapjoin_subquery2.q, \ + mapjoin_test_outer.q, \ + mapreduce1.q, \ + mapreduce2.q, \ + merge1.q, \ + merge2.q, \ + mergejoins.q, \ + mergejoins_mixed.q, \ + metadata_only_queries.q, \ + metadata_only_queries_with_filters.q, \ + multi_insert.q, \ + multi_insert_gby.q, \ + multi_insert_gby2.q, \ + multi_insert_gby3.q, \ + multi_insert_lateral_view.q, \ + multi_insert_mixed.q, \ + multi_insert_move_tasks_share_dependencies.q, \ + multi_join_union.q, \ + multigroupby_singlemr.q, \ + optimize_nullscan.q, \ + order.q, \ + order2.q, \ + outer_join_ppr.q, \ + parallel.q, \ parallel_join0.q, \ parallel_join1.q, \ - pcr.q \ - ppd_multi_insert.q \ - ppd_transform.q \ - ptf_decimal.q \ - ptf_general_queries.q \ - ptf_matchpath.q \ - ptf_rcfile.q \ - ptf_register_tblfn.q \ - ptf_seqfile.q \ - sample1.q \ - sample2.q \ - sample3.q \ - sample4.q \ - sample5.q \ - sample6.q \ - sample7.q \ - sample8.q \ - sample9.q \ - sample10.q \ - script_env_var1.q \ - script_env_var2.q \ - script_pipe.q \ - scriptfile1.q \ - smb_mapjoin_13.q \ - smb_mapjoin_15.q \ - smb_mapjoin_16.q \ - sort.q \ - spark_test.q \ - subquery_multiinsert.q \ - table_access_keys_stats.q \ - temp_table.q \ - timestamp_1.q \ - timestamp_2.q \ - timestamp_3.q \ - timestamp_comparison.q \ - timestamp_lazy.q \ - timestamp_null.q \ - timestamp_udf.q \ - transform_ppr1.q \ - transform_ppr2.q \ - transform1.q \ - transform2.q \ - union.q \ - union10.q \ - union11.q \ - union13.q \ - union14.q \ - union15.q \ - union16.q \ - union18.q \ - union19.q \ - union2.q \ - union23.q \ - union25.q \ - union28.q \ - union29.q \ - union3.q \ - union30.q \ - union33.q \ - union4.q \ - union5.q \ - union6.q \ - union7.q \ - union8.q \ - union9.q \ - union_null.q \ - union_ppr.q \ - union_remove_1.q \ - union_remove_10.q \ - union_remove_11.q \ - union_remove_15.q \ - union_remove_16.q \ - union_remove_17.q \ - union_remove_18.q \ - union_remove_19.q \ - union_remove_2.q \ - union_remove_20.q \ - union_remove_21.q \ - union_remove_24.q \ - union_remove_25.q \ - union_remove_3.q \ - union_remove_4.q \ - union_remove_5.q \ - union_remove_6.q \ - union_remove_7.q \ - union_remove_8.q \ - union_remove_9.q \ - load_dyn_part4.q \ - load_dyn_part5.q \ - load_dyn_part6.q \ - load_dyn_part7.q \ - load_dyn_part8.q \ - load_dyn_part9.q \ - load_dyn_part10.q \ - load_dyn_part11.q \ - load_dyn_part12.q \ - load_dyn_part13.q \ - load_dyn_part14.q \ - load_dyn_part15.q \ - vector_between_in.q \ - vector_cast_constant.q \ - vector_char_4.q \ - vector_count_distinct.q \ - vector_data_types.q \ - vector_decimal_aggregate.q \ - vector_elt.q \ - vector_left_outer_join.q \ - vector_orderby_5.q \ - vector_string_concat.q \ - vector_varchar_4.q \ - vectorization_0.q \ - vectorization_1.q \ - vectorization_10.q \ - vectorization_11.q \ - vectorization_12.q \ - vectorization_13.q \ - vectorization_14.q \ - vectorization_15.q \ - vectorization_2.q \ - vectorization_3.q \ - vectorization_4.q \ - vectorization_5.q \ - vectorization_6.q \ - vectorization_9.q \ - vectorization_decimal_date.q \ - vectorization_div0.q \ - vectorization_nested_udf.q \ - vectorization_not.q \ - vectorization_part.q \ - vectorization_part_project.q \ - vectorization_pushdown.q \ - vectorized_bucketmapjoin1.q \ - vectorized_case.q \ - vectorized_mapjoin.q \ + parquet_join.q, \ + pcr.q, \ + ppd_gby_join.q, \ + ppd_join.q, \ + ppd_join2.q, \ + ppd_join3.q, \ + ppd_join4.q, \ + ppd_join5.q, \ + ppd_join_filter.q, \ + ppd_multi_insert.q, \ + ppd_outer_join1.q, \ + ppd_outer_join2.q, \ + ppd_outer_join3.q, \ + ppd_outer_join4.q, \ + ppd_outer_join5.q, \ + ppd_transform.q, \ + ptf_decimal.q, \ + ptf_general_queries.q, \ + ptf_matchpath.q, \ + ptf_rcfile.q, \ + ptf_register_tblfn.q, \ + ptf_seqfile.q, \ + reduce_deduplicate_exclude_join.q, \ + router_join_ppr.q, \ + sample1.q, \ + sample10.q, \ + sample2.q, \ + sample3.q, \ + sample4.q, \ + sample5.q, \ + sample6.q, \ + sample7.q, \ + sample8.q, \ + sample9.q, \ + script_env_var1.q, \ + script_env_var2.q, \ + script_pipe.q, \ + scriptfile1.q, \ + semijoin.q, \ + skewjoin.q, \ + skewjoin_noskew.q, \ + skewjoin_union_remove_1.q, \ + skewjoin_union_remove_2.q, \ + skewjoinopt1.q, \ + skewjoinopt10.q, \ + skewjoinopt11.q, \ + skewjoinopt12.q, \ + skewjoinopt13.q, \ + skewjoinopt14.q, \ + skewjoinopt15.q, \ + skewjoinopt16.q, \ + skewjoinopt17.q, \ + skewjoinopt18.q, \ + skewjoinopt19.q, \ + skewjoinopt20.q, \ + skewjoinopt3.q, \ + skewjoinopt4.q, \ + skewjoinopt5.q, \ + skewjoinopt6.q, \ + skewjoinopt7.q, \ + skewjoinopt8.q, \ + skewjoinopt9.q, \ + smb_mapjoin9.q, \ + smb_mapjoin_1.q, \ + smb_mapjoin_10.q, \ + smb_mapjoin_13.q, \ + smb_mapjoin_14.q, \ + smb_mapjoin_15.q, \ + smb_mapjoin_16.q, \ + smb_mapjoin_17.q, \ + smb_mapjoin_18.q, \ + smb_mapjoin_19.q, \ + smb_mapjoin_2.q, \ + smb_mapjoin_20.q, \ + smb_mapjoin_21.q, \ + smb_mapjoin_22.q, \ + smb_mapjoin_25.q, \ + smb_mapjoin_3.q, \ + smb_mapjoin_4.q, \ + smb_mapjoin_5.q, \ + smb_mapjoin_6.q, \ + smb_mapjoin_7.q, \ + smb_mapjoin_8.q, \ + sort.q, \ + sort_merge_join_desc_1.q, \ + sort_merge_join_desc_2.q, \ + sort_merge_join_desc_3.q, \ + sort_merge_join_desc_4.q, \ + sort_merge_join_desc_5.q, \ + sort_merge_join_desc_6.q, \ + sort_merge_join_desc_7.q, \ + sort_merge_join_desc_8.q, \ + spark_test.q, \ + subquery_exists.q, \ + subquery_in.q, \ + subquery_multiinsert.q, \ + table_access_keys_stats.q, \ + temp_table.q, \ + temp_table_join1.q, \ + tez_join_tests.q, \ + tez_joins_explain.q, \ + timestamp_1.q, \ + timestamp_2.q, \ + timestamp_3.q, \ + timestamp_comparison.q, \ + timestamp_lazy.q, \ + timestamp_null.q, \ + timestamp_udf.q, \ + transform1.q, \ + transform2.q, \ + transform_ppr1.q, \ + transform_ppr2.q, \ + union.q, \ + union10.q, \ + union11.q, \ + union13.q, \ + union14.q, \ + union15.q, \ + union16.q, \ + union18.q, \ + union19.q, \ + union2.q, \ + union23.q, \ + union25.q, \ + union28.q, \ + union29.q, \ + union3.q, \ + union30.q, \ + union33.q, \ + union4.q, \ + union5.q, \ + union6.q, \ + union7.q, \ + union8.q, \ + union9.q, \ + union_null.q, \ + union_ppr.q, \ + union_remove_1.q, \ + union_remove_10.q, \ + union_remove_11.q, \ + union_remove_15.q, \ + union_remove_16.q, \ + union_remove_17.q, \ + union_remove_18.q, \ + union_remove_19.q, \ + union_remove_2.q, \ + union_remove_20.q, \ + union_remove_21.q, \ + union_remove_24.q, \ + union_remove_25.q, \ + union_remove_3.q, \ + union_remove_4.q, \ + union_remove_5.q, \ + union_remove_6.q, \ + union_remove_7.q, \ + union_remove_8.q, \ + union_remove_9.q, \ + uniquejoin.q, \ + varchar_join1.q, \ + vector_between_in.q, \ + vector_cast_constant.q, \ + vector_char_4.q, \ + vector_count_distinct.q, \ + vector_data_types.q, \ + vector_decimal_aggregate.q, \ + vector_elt.q, \ + vector_left_outer_join.q, \ + vector_orderby_5.q, \ + vector_string_concat.q, \ + vector_varchar_4.q, \ + vectorization_0.q, \ + vectorization_1.q, \ + vectorization_10.q, \ + vectorization_11.q, \ + vectorization_12.q, \ + vectorization_13.q, \ + vectorization_14.q, \ + vectorization_15.q, \ + vectorization_2.q, \ + vectorization_3.q, \ + vectorization_4.q, \ + vectorization_5.q, \ + vectorization_6.q, \ + vectorization_9.q, \ + vectorization_decimal_date.q, \ + vectorization_div0.q, \ + vectorization_nested_udf.q, \ + vectorization_not.q, \ + vectorization_part.q, \ + vectorization_part_project.q, \ + vectorization_pushdown.q, \ + vectorized_bucketmapjoin1.q, \ + vectorized_case.q, \ + vectorized_mapjoin.q, \ vectorized_math_funcs.q,\ - vectorized_nested_mapjoin.q \ - vectorized_ptf.q \ - vectorized_rcfile_columnar.q \ - vectorized_shufflejoin.q \ - vectorized_string_funcs.q \ - vectorized_timestamp_funcs.q \ - windowing.q \ - subquery_exists.q \ - subquery_in.q \ - auto_sortmerge_join_1.q \ - skewjoin.q + vectorized_nested_mapjoin.q, \ + vectorized_ptf.q, \ + vectorized_rcfile_columnar.q, \ + vectorized_shufflejoin.q, \ + vectorized_string_funcs.q, \ + vectorized_timestamp_funcs.q, \ + windowing.q, \ diff --git ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out new file mode 100644 index 0000000..a56ee53 --- /dev/null +++ ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out @@ -0,0 +1,720 @@ +PREHOOK: query: create table if not exists emp ( + lastname string, + deptid int, + locid int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emp +POSTHOOK: query: create table if not exists emp ( + lastname string, + deptid int, + locid int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emp +PREHOOK: query: create table if not exists dept ( + deptid int, + deptname string +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dept +POSTHOOK: query: create table if not exists dept ( + deptid int, + deptname string +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dept +PREHOOK: query: create table if not exists loc ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc +POSTHOOK: query: create table if not exists loc ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@emp +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@emp +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dept +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dept +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@loc +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@loc +PREHOOK: query: analyze table emp compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@emp +PREHOOK: Output: default@emp +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: analyze table emp compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp +POSTHOOK: Output: default@emp +PREHOOK: query: analyze table dept compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@dept +PREHOOK: Output: default@dept +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: analyze table dept compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept +POSTHOOK: Output: default@dept +PREHOOK: query: analyze table loc compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@loc +PREHOOK: Output: default@loc +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: analyze table loc compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc +POSTHOOK: Output: default@loc +PREHOOK: query: analyze table emp compute statistics for columns lastname,deptid,locid +PREHOOK: type: QUERY +PREHOOK: Input: default@emp +#### A masked pattern was here #### +POSTHOOK: query: analyze table emp compute statistics for columns lastname,deptid,locid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp +#### A masked pattern was here #### +PREHOOK: query: analyze table dept compute statistics for columns deptname,deptid +PREHOOK: type: QUERY +PREHOOK: Input: default@dept +#### A masked pattern was here #### +POSTHOOK: query: analyze table dept compute statistics for columns deptname,deptid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept +#### A masked pattern was here #### +PREHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc +#### A masked pattern was here #### +PREHOOK: query: -- number of rows +-- emp - 48 +-- dept - 6 +-- loc - 8 + +-- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows) +-- emp.deptid - 3 +-- emp.lastname - 6 +-- emp.locid - 7 +-- dept.deptid - 7 +-- dept.deptname - 6 +-- loc.locid - 7 +-- loc.state - 6 + +-- 2 relations, 1 attribute +-- Expected output rows: (48*6)/max(3,7) = 41 +explain select * from emp e join dept d on (e.deptid = d.deptid) +PREHOOK: type: QUERY +POSTHOOK: query: -- number of rows +-- emp - 48 +-- dept - 6 +-- loc - 8 + +-- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows) +-- emp.deptid - 3 +-- emp.lastname - 6 +-- emp.locid - 7 +-- dept.deptid - 7 +-- dept.deptname - 6 +-- loc.locid - 7 +-- loc.state - 6 + +-- 2 relations, 1 attribute +-- Expected output rows: (48*6)/max(3,7) = 41 +explain select * from emp e join dept d on (e.deptid = d.deptid) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 68 Basic stats: PARTIAL Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int) + sort order: + + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + value expressions: deptname (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 5 Data size: 495 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int) + sort order: + + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 5 Data size: 495 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: lastname (type: string), locid (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col6, _col7 + Statistics: Num rows: 5 Data size: 544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 544 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 544 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- 2 relations, 2 attributes +-- Expected output rows: (48*6)/(max(3,7) * max(6,6)) = 6 +explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname +PREHOOK: type: QUERY +POSTHOOK: query: -- 2 relations, 2 attributes +-- Expected output rows: (48*6)/(max(3,7) * max(6,6)) = 6 +explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dept + Statistics: Num rows: 0 Data size: 68 Basic stats: PARTIAL Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int), deptname (type: string) + sort order: ++ + Map-reduce partition columns: deptid (type: int), deptname (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Map 3 + Map Operator Tree: + TableScan + alias: emp + Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 5 Data size: 495 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int), lastname (type: string) + sort order: ++ + Map-reduce partition columns: deptid (type: int), lastname (type: string) + Statistics: Num rows: 5 Data size: 495 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: locid (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col2, _col6, _col7 + Statistics: Num rows: 5 Data size: 544 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 = _col6) and (_col0 = _col7)) (type: boolean) + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 68 Basic stats: PARTIAL Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int), deptname (type: string) + sort order: ++ + Map-reduce partition columns: deptid (type: int), deptname (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Map 3 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 5 Data size: 495 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int), lastname (type: string) + sort order: ++ + Map-reduce partition columns: deptid (type: int), lastname (type: string) + Statistics: Num rows: 5 Data size: 495 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: locid (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col2, _col6, _col7 + Statistics: Num rows: 5 Data size: 544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 544 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 544 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- 2 relations, 3 attributes +-- Expected output rows: (48*6)/(max(3,7) * max(6,6) * max(6,6)) = 1 +explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +PREHOOK: type: QUERY +POSTHOOK: query: -- 2 relations, 3 attributes +-- Expected output rows: (48*6)/(max(3,7) * max(6,6) * max(6,6)) = 1 +explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dept + Statistics: Num rows: 0 Data size: 68 Basic stats: PARTIAL Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int), deptname (type: string), deptname (type: string) + sort order: +++ + Map-reduce partition columns: deptid (type: int), deptname (type: string), deptname (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Map 3 + Map Operator Tree: + TableScan + alias: emp + Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 5 Data size: 495 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int), lastname (type: string), lastname (type: string) + sort order: +++ + Map-reduce partition columns: deptid (type: int), lastname (type: string), lastname (type: string) + Statistics: Num rows: 5 Data size: 495 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: locid (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col2, _col6, _col7 + Statistics: Num rows: 5 Data size: 544 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col1 = _col6) and (_col0 = _col7)) and (_col7 = _col0)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- 3 relations, 1 attribute +-- Expected output rows: (48*6*48)/top2largest(3,7,3) = 658 +explain select * from emp e join dept d on (e.deptid = d.deptid) join emp e1 on (e.deptid = e1.deptid) +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 relations, 1 attribute +-- Expected output rows: (48*6*48)/top2largest(3,7,3) = 658 +explain select * from emp e join dept d on (e.deptid = d.deptid) join emp e1 on (e.deptid = e1.deptid) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 68 Basic stats: PARTIAL Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int) + sort order: + + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + value expressions: deptname (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: e1 + Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 5 Data size: 495 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int) + sort order: + + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 5 Data size: 495 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: lastname (type: string), locid (type: int) + Map 4 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 5 Data size: 495 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int) + sort order: + + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 5 Data size: 495 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: lastname (type: string), locid (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col11, _col12, _col13 + Statistics: Num rows: 11 Data size: 1089 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string), _col11 (type: string), _col12 (type: int), _col13 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 11 Data size: 1089 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 1089 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- Expected output rows: (48*6*8)/top2largest(3,7,7) = 47 +explain select * from emp e join dept d on (e.deptid = d.deptid) join loc l on (e.deptid = l.locid) +PREHOOK: type: QUERY +POSTHOOK: query: -- Expected output rows: (48*6*8)/top2largest(3,7,7) = 47 +explain select * from emp e join dept d on (e.deptid = d.deptid) join loc l on (e.deptid = l.locid) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 68 Basic stats: PARTIAL Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int) + sort order: + + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + value expressions: deptname (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 5 Data size: 495 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int) + sort order: + + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 5 Data size: 495 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: lastname (type: string), locid (type: int) + Map 4 + Map Operator Tree: + TableScan + alias: l + Statistics: Num rows: 1 Data size: 117 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: locid is not null (type: boolean) + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: locid (type: int) + sort order: + + Map-reduce partition columns: locid (type: int) + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: state (type: string), zip (type: bigint), year (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col11, _col12, _col13, _col14 + Statistics: Num rows: 11 Data size: 1089 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string), _col11 (type: string), _col12 (type: int), _col13 (type: bigint), _col14 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 11 Data size: 1089 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 1089 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- 3 relations and 2 attribute +-- Expected output rows: (48*6*8)/top2largest(3,7,7)*top2largest(6,6,6) = 1 +explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state) +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 relations and 2 attribute +-- Expected output rows: (48*6*8)/top2largest(3,7,7)*top2largest(6,6,6) = 1 +explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 68 Basic stats: PARTIAL Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int), deptname (type: string) + sort order: ++ + Map-reduce partition columns: deptid (type: int), deptname (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Map 3 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 5 Data size: 495 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int), lastname (type: string) + sort order: ++ + Map-reduce partition columns: deptid (type: int), lastname (type: string) + Statistics: Num rows: 5 Data size: 495 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: locid (type: int) + Map 4 + Map Operator Tree: + TableScan + alias: l + Statistics: Num rows: 1 Data size: 117 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid is not null and state is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: locid (type: int), state (type: string) + sort order: ++ + Map-reduce partition columns: locid (type: int), state (type: string) + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: zip (type: bigint), year (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 2 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col11, _col12, _col13, _col14 + Statistics: Num rows: 11 Data size: 1089 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string), _col11 (type: string), _col12 (type: int), _col13 (type: bigint), _col14 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 11 Data size: 1089 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 1089 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/auto_join0.q.out ql/src/test/results/clientpositive/spark/auto_join0.q.out new file mode 100644 index 0000000..76ff63d --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join0.q.out @@ -0,0 +1,150 @@ +PREHOOK: query: explain +select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +) a +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +) a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +) a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +34441656720 diff --git ql/src/test/results/clientpositive/spark/auto_join1.q.out ql/src/test/results/clientpositive/spark/auto_join1.q.out new file mode 100644 index 0000000..222fe6b --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join1.q.out @@ -0,0 +1,117 @@ +PREHOOK: query: CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j1 +PREHOOK: query: explain +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_j1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest_j1.key,dest_j1.value)) FROM dest_j1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest_j1.key,dest_j1.value)) FROM dest_j1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +101861029915 diff --git ql/src/test/results/clientpositive/spark/auto_join10.q.out ql/src/test/results/clientpositive/spark/auto_join10.q.out new file mode 100644 index 0000000..05a5912 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join10.q.out @@ -0,0 +1,130 @@ +PREHOOK: query: explain +FROM +(SELECT src.* FROM src) x +JOIN +(SELECT src.* FROM src) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT src.* FROM src) x +JOIN +(SELECT src.* FROM src) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT src.* FROM src) x +JOIN +(SELECT src.* FROM src) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT src.* FROM src) x +JOIN +(SELECT src.* FROM src) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +103231310608 diff --git ql/src/test/results/clientpositive/spark/auto_join11.q.out ql/src/test/results/clientpositive/spark/auto_join11.q.out new file mode 100644 index 0000000..998c28b --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join11.q.out @@ -0,0 +1,130 @@ +PREHOOK: query: explain +SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 100) and key is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 100) and key is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col3 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +-101333194320 diff --git ql/src/test/results/clientpositive/spark/auto_join12.q.out ql/src/test/results/clientpositive/spark/auto_join12.q.out new file mode 100644 index 0000000..d2b7993 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join12.q.out @@ -0,0 +1,161 @@ +PREHOOK: query: explain +SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +JOIN +(SELECT src.key as c5, src.value as c6 from src) src3 +ON src1.c1 = src3.c5 AND src3.c5 < 80 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +JOIN +(SELECT src.key as c5, src.value as c6 from src) src3 +ON src1.c1 = src3.c5 AND src3.c5 < 80 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 100) and key is not null) and (key < 80)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 80) and key is not null) and (key < 100)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 100) and key is not null) and (key < 80)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + 2 + outputColumnNames: _col0, _col3 + Statistics: Num rows: 59 Data size: 629 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 59 Data size: 629 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +JOIN +(SELECT src.key as c5, src.value as c6 from src) src3 +ON src1.c1 = src3.c5 AND src3.c5 < 80 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +JOIN +(SELECT src.key as c5, src.value as c6 from src) src3 +ON src1.c1 = src3.c5 AND src3.c5 < 80 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +-136843922952 diff --git ql/src/test/results/clientpositive/spark/auto_join13.q.out ql/src/test/results/clientpositive/spark/auto_join13.q.out new file mode 100644 index 0000000..78aa01e --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join13.q.out @@ -0,0 +1,179 @@ +PREHOOK: query: explain +SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +JOIN +(SELECT src.key as c5, src.value as c6 from src) src3 +ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 200 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +JOIN +(SELECT src.key as c5, src.value as c6 from src) src3 +ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 200 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 100) and key is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 100) and key is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 200) and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col2) is not null (type: boolean) + Statistics: Num rows: 46 Data size: 489 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (_col0 + _col2) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + _col2) (type: double) + Statistics: Num rows: 46 Data size: 489 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col3 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col3} + 1 + outputColumnNames: _col0, _col3 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +JOIN +(SELECT src.key as c5, src.value as c6 from src) src3 +ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 200 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(src1.c1, src2.c4)) +FROM +(SELECT src.key as c1, src.value as c2 from src) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src) src2 +ON src1.c1 = src2.c3 AND src1.c1 < 100 +JOIN +(SELECT src.key as c5, src.value as c6 from src) src3 +ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 200 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +-97670109576 diff --git ql/src/test/results/clientpositive/spark/auto_join14.q.out ql/src/test/results/clientpositive/spark/auto_join14.q.out new file mode 100644 index 0000000..e1e6a4b --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join14.q.out @@ -0,0 +1,127 @@ +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: explain +FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 100) and key is not null) (type: boolean) + Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 100) and key is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2)) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2)) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +404554174174 diff --git ql/src/test/results/clientpositive/spark/auto_join15.q.out ql/src/test/results/clientpositive/spark/auto_join15.q.out new file mode 100644 index 0000000..5916070 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join15.q.out @@ -0,0 +1,134 @@ +PREHOOK: query: explain +select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +SORT BY k1, v1, k2, v2 +) a +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +SORT BY k1, v1, k2, v2 +) a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +SORT BY k1, v1, k2, v2 +) a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +SORT BY k1, v1, k2, v2 +) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +524272996896 diff --git ql/src/test/results/clientpositive/spark/auto_join16.q.out ql/src/test/results/clientpositive/spark/auto_join16.q.out new file mode 100644 index 0000000..0b6807d --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join16.q.out @@ -0,0 +1,125 @@ +PREHOOK: query: explain +SELECT sum(hash(subq.key, tab.value)) +FROM +(select a.key, a.value from src a where a.key > 10 ) subq +JOIN src tab +ON (subq.key = tab.key and subq.key > 20 and subq.value = tab.value) +where tab.value < 200 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT sum(hash(subq.key, tab.value)) +FROM +(select a.key, a.value from src a where a.key > 10 ) subq +JOIN src tab +ON (subq.key = tab.key and subq.key > 20 and subq.value = tab.value) +where tab.value < 200 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((key > 10) and (key > 20)) and key is not null) and value is not null) and (value < 200)) (type: boolean) + Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: tab + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((key > 20) and key is not null) and value is not null) and (value < 200)) (type: boolean) + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey1} + outputColumnNames: _col0, _col3 + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(subq.key, tab.value)) +FROM +(select a.key, a.value from src a where a.key > 10 ) subq +JOIN src tab +ON (subq.key = tab.key and subq.key > 20 and subq.value = tab.value) +where tab.value < 200 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(subq.key, tab.value)) +FROM +(select a.key, a.value from src a where a.key > 10 ) subq +JOIN src tab +ON (subq.key = tab.key and subq.key > 20 and subq.value = tab.value) +where tab.value < 200 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL diff --git ql/src/test/results/clientpositive/spark/auto_join17.q.out ql/src/test/results/clientpositive/spark/auto_join17.q.out new file mode 100644 index 0000000..8985dd4 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join17.q.out @@ -0,0 +1,120 @@ +PREHOOK: query: CREATE TABLE dest1(key1 INT, value1 STRING, key2 INT, value2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(key1 INT, value1 STRING, key2 INT, value2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: explain +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col5) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value1 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1.key1,dest1.value1,dest1.key2,dest1.value2)) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1.key1,dest1.value1,dest1.key2,dest1.value2)) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +-793937029770 diff --git ql/src/test/results/clientpositive/spark/auto_join18.q.out ql/src/test/results/clientpositive/spark/auto_join18.q.out new file mode 100644 index 0000000..6083b38 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join18.q.out @@ -0,0 +1,196 @@ +PREHOOK: query: explain + SELECT sum(hash(a.key, a.value, b.key, b.value)) + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain + SELECT sum(hash(a.key, a.value, b.key, b.value)) + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT value) + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(a.key, a.value, b.key, b.value)) + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key, a.value, b.key, b.value)) + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +379685492277 diff --git ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out new file mode 100644 index 0000000..01c8f0a --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out @@ -0,0 +1,200 @@ +PREHOOK: query: explain + SELECT sum(hash(a.key, a.value, b.key, b.value1, b.value2)) + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain + SELECT sum(hash(a.key, a.value, b.key, b.value1, b.value2)) + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT value), count(DISTINCT key) + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(a.key, a.value, b.key, b.value1, b.value2)) + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key, a.value, b.key, b.value1, b.value2)) + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +40694575227 diff --git ql/src/test/results/clientpositive/spark/auto_join19.q.out ql/src/test/results/clientpositive/spark/auto_join19.q.out new file mode 100644 index 0000000..974aec5 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join19.q.out @@ -0,0 +1,131 @@ +PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: explain +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col8 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@dest1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +407444119660 diff --git ql/src/test/results/clientpositive/spark/auto_join2.q.out ql/src/test/results/clientpositive/spark/auto_join2.q.out new file mode 100644 index 0000000..49544a1 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join2.q.out @@ -0,0 +1,150 @@ +PREHOOK: query: CREATE TABLE dest_j2(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j2 +POSTHOOK: query: CREATE TABLE dest_j2(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j2 +PREHOOK: query: explain +FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) +INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) +INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (_col0 + _col5) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + _col5) (type: double) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col1} + outputColumnNames: _col0, _col11 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) +INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_j2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) +INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_j2 +POSTHOOK: Lineage: dest_j2.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.value SIMPLE [(src)src3.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest_j2.key,dest_j2.value)) FROM dest_j2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest_j2.key,dest_j2.value)) FROM dest_j2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j2 +#### A masked pattern was here #### +33815990627 diff --git ql/src/test/results/clientpositive/spark/auto_join20.q.out ql/src/test/results/clientpositive/spark/auto_join20.q.out new file mode 100644 index 0000000..a8f2b9a --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join20.q.out @@ -0,0 +1,302 @@ +PREHOOK: query: explain +select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 +FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) +SORT BY k1,v1,k2,v2,k3,v3 +)a +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 +FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) +SORT BY k1,v1,k2,v2,k3,v3 +)a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 + 2 {(KEY.reducesinkkey0 < 20)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 +FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) +SORT BY k1,v1,k2,v2,k3,v3 +)a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 +FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) +SORT BY k1,v1,k2,v2,k3,v3 +)a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +56157587016 +PREHOOK: query: explain +select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 +FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key < 15) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) +SORT BY k1,v1,k2,v2,k3,v3 +)a +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 +FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key < 15) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) +SORT BY k1,v1,k2,v2,k3,v3 +)a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 10) and (key < 15)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 15) and (key < 10)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 + 2 {(KEY.reducesinkkey0 < 20)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 +FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key < 15) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) +SORT BY k1,v1,k2,v2,k3,v3 +)a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(a.k1,a.v1,a.k2,a.v2,a.k3,a.v3)) +from ( +SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 +FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key < 15) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) +SORT BY k1,v1,k2,v2,k3,v3 +)a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +56157587016 diff --git ql/src/test/results/clientpositive/spark/auto_join21.q.out ql/src/test/results/clientpositive/spark/auto_join21.q.out new file mode 100644 index 0000000..f9ac35d --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join21.q.out @@ -0,0 +1,606 @@ +PREHOOK: query: explain +SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 {(KEY.reducesinkkey0 < 10)} + 1 + 2 {(KEY.reducesinkkey0 < 10)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL NULL NULL NULL 0 val_0 +NULL NULL NULL NULL 0 val_0 +NULL NULL NULL NULL 0 val_0 +NULL NULL NULL NULL 10 val_10 +NULL NULL NULL NULL 100 val_100 +NULL NULL NULL NULL 100 val_100 +NULL NULL NULL NULL 103 val_103 +NULL NULL NULL NULL 103 val_103 +NULL NULL NULL NULL 104 val_104 +NULL NULL NULL NULL 104 val_104 +NULL NULL NULL NULL 105 val_105 +NULL NULL NULL NULL 11 val_11 +NULL NULL NULL NULL 111 val_111 +NULL NULL NULL NULL 113 val_113 +NULL NULL NULL NULL 113 val_113 +NULL NULL NULL NULL 114 val_114 +NULL NULL NULL NULL 116 val_116 +NULL NULL NULL NULL 118 val_118 +NULL NULL NULL NULL 118 val_118 +NULL NULL NULL NULL 119 val_119 +NULL NULL NULL NULL 119 val_119 +NULL NULL NULL NULL 119 val_119 +NULL NULL NULL NULL 12 val_12 +NULL NULL NULL NULL 12 val_12 +NULL NULL NULL NULL 120 val_120 +NULL NULL NULL NULL 120 val_120 +NULL NULL NULL NULL 125 val_125 +NULL NULL NULL NULL 125 val_125 +NULL NULL NULL NULL 126 val_126 +NULL NULL NULL NULL 128 val_128 +NULL NULL NULL NULL 128 val_128 +NULL NULL NULL NULL 128 val_128 +NULL NULL NULL NULL 129 val_129 +NULL NULL NULL NULL 129 val_129 +NULL NULL NULL NULL 131 val_131 +NULL NULL NULL NULL 133 val_133 +NULL NULL NULL NULL 134 val_134 +NULL NULL NULL NULL 134 val_134 +NULL NULL NULL NULL 136 val_136 +NULL NULL NULL NULL 137 val_137 +NULL NULL NULL NULL 137 val_137 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 143 val_143 +NULL NULL NULL NULL 145 val_145 +NULL NULL NULL NULL 146 val_146 +NULL NULL NULL NULL 146 val_146 +NULL NULL NULL NULL 149 val_149 +NULL NULL NULL NULL 149 val_149 +NULL NULL NULL NULL 15 val_15 +NULL NULL NULL NULL 15 val_15 +NULL NULL NULL NULL 150 val_150 +NULL NULL NULL NULL 152 val_152 +NULL NULL NULL NULL 152 val_152 +NULL NULL NULL NULL 153 val_153 +NULL NULL NULL NULL 155 val_155 +NULL NULL NULL NULL 156 val_156 +NULL NULL NULL NULL 157 val_157 +NULL NULL NULL NULL 158 val_158 +NULL NULL NULL NULL 160 val_160 +NULL NULL NULL NULL 162 val_162 +NULL NULL NULL NULL 163 val_163 +NULL NULL NULL NULL 164 val_164 +NULL NULL NULL NULL 164 val_164 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 166 val_166 +NULL NULL NULL NULL 167 val_167 +NULL NULL NULL NULL 167 val_167 +NULL NULL NULL NULL 167 val_167 +NULL NULL NULL NULL 168 val_168 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 17 val_17 +NULL NULL NULL NULL 170 val_170 +NULL NULL NULL NULL 172 val_172 +NULL NULL NULL NULL 172 val_172 +NULL NULL NULL NULL 174 val_174 +NULL NULL NULL NULL 174 val_174 +NULL NULL NULL NULL 175 val_175 +NULL NULL NULL NULL 175 val_175 +NULL NULL NULL NULL 176 val_176 +NULL NULL NULL NULL 176 val_176 +NULL NULL NULL NULL 177 val_177 +NULL NULL NULL NULL 178 val_178 +NULL NULL NULL NULL 179 val_179 +NULL NULL NULL NULL 179 val_179 +NULL NULL NULL NULL 18 val_18 +NULL NULL NULL NULL 18 val_18 +NULL NULL NULL NULL 180 val_180 +NULL NULL NULL NULL 181 val_181 +NULL NULL NULL NULL 183 val_183 +NULL NULL NULL NULL 186 val_186 +NULL NULL NULL NULL 187 val_187 +NULL NULL NULL NULL 187 val_187 +NULL NULL NULL NULL 187 val_187 +NULL NULL NULL NULL 189 val_189 +NULL NULL NULL NULL 19 val_19 +NULL NULL NULL NULL 190 val_190 +NULL NULL NULL NULL 191 val_191 +NULL NULL NULL NULL 191 val_191 +NULL NULL NULL NULL 192 val_192 +NULL NULL NULL NULL 193 val_193 +NULL NULL NULL NULL 193 val_193 +NULL NULL NULL NULL 193 val_193 +NULL NULL NULL NULL 194 val_194 +NULL NULL NULL NULL 195 val_195 +NULL NULL NULL NULL 195 val_195 +NULL NULL NULL NULL 196 val_196 +NULL NULL NULL NULL 197 val_197 +NULL NULL NULL NULL 197 val_197 +NULL NULL NULL NULL 199 val_199 +NULL NULL NULL NULL 199 val_199 +NULL NULL NULL NULL 199 val_199 +NULL NULL NULL NULL 2 val_2 +NULL NULL NULL NULL 20 val_20 +NULL NULL NULL NULL 200 val_200 +NULL NULL NULL NULL 200 val_200 +NULL NULL NULL NULL 201 val_201 +NULL NULL NULL NULL 202 val_202 +NULL NULL NULL NULL 203 val_203 +NULL NULL NULL NULL 203 val_203 +NULL NULL NULL NULL 205 val_205 +NULL NULL NULL NULL 205 val_205 +NULL NULL NULL NULL 207 val_207 +NULL NULL NULL NULL 207 val_207 +NULL NULL NULL NULL 208 val_208 +NULL NULL NULL NULL 208 val_208 +NULL NULL NULL NULL 208 val_208 +NULL NULL NULL NULL 209 val_209 +NULL NULL NULL NULL 209 val_209 +NULL NULL NULL NULL 213 val_213 +NULL NULL NULL NULL 213 val_213 +NULL NULL NULL NULL 214 val_214 +NULL NULL NULL NULL 216 val_216 +NULL NULL NULL NULL 216 val_216 +NULL NULL NULL NULL 217 val_217 +NULL NULL NULL NULL 217 val_217 +NULL NULL NULL NULL 218 val_218 +NULL NULL NULL NULL 219 val_219 +NULL NULL NULL NULL 219 val_219 +NULL NULL NULL NULL 221 val_221 +NULL NULL NULL NULL 221 val_221 +NULL NULL NULL NULL 222 val_222 +NULL NULL NULL NULL 223 val_223 +NULL NULL NULL NULL 223 val_223 +NULL NULL NULL NULL 224 val_224 +NULL NULL NULL NULL 224 val_224 +NULL NULL NULL NULL 226 val_226 +NULL NULL NULL NULL 228 val_228 +NULL NULL NULL NULL 229 val_229 +NULL NULL NULL NULL 229 val_229 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 233 val_233 +NULL NULL NULL NULL 233 val_233 +NULL NULL NULL NULL 235 val_235 +NULL NULL NULL NULL 237 val_237 +NULL NULL NULL NULL 237 val_237 +NULL NULL NULL NULL 238 val_238 +NULL NULL NULL NULL 238 val_238 +NULL NULL NULL NULL 239 val_239 +NULL NULL NULL NULL 239 val_239 +NULL NULL NULL NULL 24 val_24 +NULL NULL NULL NULL 24 val_24 +NULL NULL NULL NULL 241 val_241 +NULL NULL NULL NULL 242 val_242 +NULL NULL NULL NULL 242 val_242 +NULL NULL NULL NULL 244 val_244 +NULL NULL NULL NULL 247 val_247 +NULL NULL NULL NULL 248 val_248 +NULL NULL NULL NULL 249 val_249 +NULL NULL NULL NULL 252 val_252 +NULL NULL NULL NULL 255 val_255 +NULL NULL NULL NULL 255 val_255 +NULL NULL NULL NULL 256 val_256 +NULL NULL NULL NULL 256 val_256 +NULL NULL NULL NULL 257 val_257 +NULL NULL NULL NULL 258 val_258 +NULL NULL NULL NULL 26 val_26 +NULL NULL NULL NULL 26 val_26 +NULL NULL NULL NULL 260 val_260 +NULL NULL NULL NULL 262 val_262 +NULL NULL NULL NULL 263 val_263 +NULL NULL NULL NULL 265 val_265 +NULL NULL NULL NULL 265 val_265 +NULL NULL NULL NULL 266 val_266 +NULL NULL NULL NULL 27 val_27 +NULL NULL NULL NULL 272 val_272 +NULL NULL NULL NULL 272 val_272 +NULL NULL NULL NULL 273 val_273 +NULL NULL NULL NULL 273 val_273 +NULL NULL NULL NULL 273 val_273 +NULL NULL NULL NULL 274 val_274 +NULL NULL NULL NULL 275 val_275 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 278 val_278 +NULL NULL NULL NULL 278 val_278 +NULL NULL NULL NULL 28 val_28 +NULL NULL NULL NULL 280 val_280 +NULL NULL NULL NULL 280 val_280 +NULL NULL NULL NULL 281 val_281 +NULL NULL NULL NULL 281 val_281 +NULL NULL NULL NULL 282 val_282 +NULL NULL NULL NULL 282 val_282 +NULL NULL NULL NULL 283 val_283 +NULL NULL NULL NULL 284 val_284 +NULL NULL NULL NULL 285 val_285 +NULL NULL NULL NULL 286 val_286 +NULL NULL NULL NULL 287 val_287 +NULL NULL NULL NULL 288 val_288 +NULL NULL NULL NULL 288 val_288 +NULL NULL NULL NULL 289 val_289 +NULL NULL NULL NULL 291 val_291 +NULL NULL NULL NULL 292 val_292 +NULL NULL NULL NULL 296 val_296 +NULL NULL NULL NULL 298 val_298 +NULL NULL NULL NULL 298 val_298 +NULL NULL NULL NULL 298 val_298 +NULL NULL NULL NULL 30 val_30 +NULL NULL NULL NULL 302 val_302 +NULL NULL NULL NULL 305 val_305 +NULL NULL NULL NULL 306 val_306 +NULL NULL NULL NULL 307 val_307 +NULL NULL NULL NULL 307 val_307 +NULL NULL NULL NULL 308 val_308 +NULL NULL NULL NULL 309 val_309 +NULL NULL NULL NULL 309 val_309 +NULL NULL NULL NULL 310 val_310 +NULL NULL NULL NULL 311 val_311 +NULL NULL NULL NULL 311 val_311 +NULL NULL NULL NULL 311 val_311 +NULL NULL NULL NULL 315 val_315 +NULL NULL NULL NULL 316 val_316 +NULL NULL NULL NULL 316 val_316 +NULL NULL NULL NULL 316 val_316 +NULL NULL NULL NULL 317 val_317 +NULL NULL NULL NULL 317 val_317 +NULL NULL NULL NULL 318 val_318 +NULL NULL NULL NULL 318 val_318 +NULL NULL NULL NULL 318 val_318 +NULL NULL NULL NULL 321 val_321 +NULL NULL NULL NULL 321 val_321 +NULL NULL NULL NULL 322 val_322 +NULL NULL NULL NULL 322 val_322 +NULL NULL NULL NULL 323 val_323 +NULL NULL NULL NULL 325 val_325 +NULL NULL NULL NULL 325 val_325 +NULL NULL NULL NULL 327 val_327 +NULL NULL NULL NULL 327 val_327 +NULL NULL NULL NULL 327 val_327 +NULL NULL NULL NULL 33 val_33 +NULL NULL NULL NULL 331 val_331 +NULL NULL NULL NULL 331 val_331 +NULL NULL NULL NULL 332 val_332 +NULL NULL NULL NULL 333 val_333 +NULL NULL NULL NULL 333 val_333 +NULL NULL NULL NULL 335 val_335 +NULL NULL NULL NULL 336 val_336 +NULL NULL NULL NULL 338 val_338 +NULL NULL NULL NULL 339 val_339 +NULL NULL NULL NULL 34 val_34 +NULL NULL NULL NULL 341 val_341 +NULL NULL NULL NULL 342 val_342 +NULL NULL NULL NULL 342 val_342 +NULL NULL NULL NULL 344 val_344 +NULL NULL NULL NULL 344 val_344 +NULL NULL NULL NULL 345 val_345 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 35 val_35 +NULL NULL NULL NULL 35 val_35 +NULL NULL NULL NULL 35 val_35 +NULL NULL NULL NULL 351 val_351 +NULL NULL NULL NULL 353 val_353 +NULL NULL NULL NULL 353 val_353 +NULL NULL NULL NULL 356 val_356 +NULL NULL NULL NULL 360 val_360 +NULL NULL NULL NULL 362 val_362 +NULL NULL NULL NULL 364 val_364 +NULL NULL NULL NULL 365 val_365 +NULL NULL NULL NULL 366 val_366 +NULL NULL NULL NULL 367 val_367 +NULL NULL NULL NULL 367 val_367 +NULL NULL NULL NULL 368 val_368 +NULL NULL NULL NULL 369 val_369 +NULL NULL NULL NULL 369 val_369 +NULL NULL NULL NULL 369 val_369 +NULL NULL NULL NULL 37 val_37 +NULL NULL NULL NULL 37 val_37 +NULL NULL NULL NULL 373 val_373 +NULL NULL NULL NULL 374 val_374 +NULL NULL NULL NULL 375 val_375 +NULL NULL NULL NULL 377 val_377 +NULL NULL NULL NULL 378 val_378 +NULL NULL NULL NULL 379 val_379 +NULL NULL NULL NULL 382 val_382 +NULL NULL NULL NULL 382 val_382 +NULL NULL NULL NULL 384 val_384 +NULL NULL NULL NULL 384 val_384 +NULL NULL NULL NULL 384 val_384 +NULL NULL NULL NULL 386 val_386 +NULL NULL NULL NULL 389 val_389 +NULL NULL NULL NULL 392 val_392 +NULL NULL NULL NULL 393 val_393 +NULL NULL NULL NULL 394 val_394 +NULL NULL NULL NULL 395 val_395 +NULL NULL NULL NULL 395 val_395 +NULL NULL NULL NULL 396 val_396 +NULL NULL NULL NULL 396 val_396 +NULL NULL NULL NULL 396 val_396 +NULL NULL NULL NULL 397 val_397 +NULL NULL NULL NULL 397 val_397 +NULL NULL NULL NULL 399 val_399 +NULL NULL NULL NULL 399 val_399 +NULL NULL NULL NULL 4 val_4 +NULL NULL NULL NULL 400 val_400 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 402 val_402 +NULL NULL NULL NULL 403 val_403 +NULL NULL NULL NULL 403 val_403 +NULL NULL NULL NULL 403 val_403 +NULL NULL NULL NULL 404 val_404 +NULL NULL NULL NULL 404 val_404 +NULL NULL NULL NULL 406 val_406 +NULL NULL NULL NULL 406 val_406 +NULL NULL NULL NULL 406 val_406 +NULL NULL NULL NULL 406 val_406 +NULL NULL NULL NULL 407 val_407 +NULL NULL NULL NULL 409 val_409 +NULL NULL NULL NULL 409 val_409 +NULL NULL NULL NULL 409 val_409 +NULL NULL NULL NULL 41 val_41 +NULL NULL NULL NULL 411 val_411 +NULL NULL NULL NULL 413 val_413 +NULL NULL NULL NULL 413 val_413 +NULL NULL NULL NULL 414 val_414 +NULL NULL NULL NULL 414 val_414 +NULL NULL NULL NULL 417 val_417 +NULL NULL NULL NULL 417 val_417 +NULL NULL NULL NULL 417 val_417 +NULL NULL NULL NULL 418 val_418 +NULL NULL NULL NULL 419 val_419 +NULL NULL NULL NULL 42 val_42 +NULL NULL NULL NULL 42 val_42 +NULL NULL NULL NULL 421 val_421 +NULL NULL NULL NULL 424 val_424 +NULL NULL NULL NULL 424 val_424 +NULL NULL NULL NULL 427 val_427 +NULL NULL NULL NULL 429 val_429 +NULL NULL NULL NULL 429 val_429 +NULL NULL NULL NULL 43 val_43 +NULL NULL NULL NULL 430 val_430 +NULL NULL NULL NULL 430 val_430 +NULL NULL NULL NULL 430 val_430 +NULL NULL NULL NULL 431 val_431 +NULL NULL NULL NULL 431 val_431 +NULL NULL NULL NULL 431 val_431 +NULL NULL NULL NULL 432 val_432 +NULL NULL NULL NULL 435 val_435 +NULL NULL NULL NULL 436 val_436 +NULL NULL NULL NULL 437 val_437 +NULL NULL NULL NULL 438 val_438 +NULL NULL NULL NULL 438 val_438 +NULL NULL NULL NULL 438 val_438 +NULL NULL NULL NULL 439 val_439 +NULL NULL NULL NULL 439 val_439 +NULL NULL NULL NULL 44 val_44 +NULL NULL NULL NULL 443 val_443 +NULL NULL NULL NULL 444 val_444 +NULL NULL NULL NULL 446 val_446 +NULL NULL NULL NULL 448 val_448 +NULL NULL NULL NULL 449 val_449 +NULL NULL NULL NULL 452 val_452 +NULL NULL NULL NULL 453 val_453 +NULL NULL NULL NULL 454 val_454 +NULL NULL NULL NULL 454 val_454 +NULL NULL NULL NULL 454 val_454 +NULL NULL NULL NULL 455 val_455 +NULL NULL NULL NULL 457 val_457 +NULL NULL NULL NULL 458 val_458 +NULL NULL NULL NULL 458 val_458 +NULL NULL NULL NULL 459 val_459 +NULL NULL NULL NULL 459 val_459 +NULL NULL NULL NULL 460 val_460 +NULL NULL NULL NULL 462 val_462 +NULL NULL NULL NULL 462 val_462 +NULL NULL NULL NULL 463 val_463 +NULL NULL NULL NULL 463 val_463 +NULL NULL NULL NULL 466 val_466 +NULL NULL NULL NULL 466 val_466 +NULL NULL NULL NULL 466 val_466 +NULL NULL NULL NULL 467 val_467 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 47 val_47 +NULL NULL NULL NULL 470 val_470 +NULL NULL NULL NULL 472 val_472 +NULL NULL NULL NULL 475 val_475 +NULL NULL NULL NULL 477 val_477 +NULL NULL NULL NULL 478 val_478 +NULL NULL NULL NULL 478 val_478 +NULL NULL NULL NULL 479 val_479 +NULL NULL NULL NULL 480 val_480 +NULL NULL NULL NULL 480 val_480 +NULL NULL NULL NULL 480 val_480 +NULL NULL NULL NULL 481 val_481 +NULL NULL NULL NULL 482 val_482 +NULL NULL NULL NULL 483 val_483 +NULL NULL NULL NULL 484 val_484 +NULL NULL NULL NULL 485 val_485 +NULL NULL NULL NULL 487 val_487 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 490 val_490 +NULL NULL NULL NULL 491 val_491 +NULL NULL NULL NULL 492 val_492 +NULL NULL NULL NULL 492 val_492 +NULL NULL NULL NULL 493 val_493 +NULL NULL NULL NULL 494 val_494 +NULL NULL NULL NULL 495 val_495 +NULL NULL NULL NULL 496 val_496 +NULL NULL NULL NULL 497 val_497 +NULL NULL NULL NULL 498 val_498 +NULL NULL NULL NULL 498 val_498 +NULL NULL NULL NULL 498 val_498 +NULL NULL NULL NULL 5 val_5 +NULL NULL NULL NULL 5 val_5 +NULL NULL NULL NULL 5 val_5 +NULL NULL NULL NULL 51 val_51 +NULL NULL NULL NULL 51 val_51 +NULL NULL NULL NULL 53 val_53 +NULL NULL NULL NULL 54 val_54 +NULL NULL NULL NULL 57 val_57 +NULL NULL NULL NULL 58 val_58 +NULL NULL NULL NULL 58 val_58 +NULL NULL NULL NULL 64 val_64 +NULL NULL NULL NULL 65 val_65 +NULL NULL NULL NULL 66 val_66 +NULL NULL NULL NULL 67 val_67 +NULL NULL NULL NULL 67 val_67 +NULL NULL NULL NULL 69 val_69 +NULL NULL NULL NULL 70 val_70 +NULL NULL NULL NULL 70 val_70 +NULL NULL NULL NULL 70 val_70 +NULL NULL NULL NULL 72 val_72 +NULL NULL NULL NULL 72 val_72 +NULL NULL NULL NULL 74 val_74 +NULL NULL NULL NULL 76 val_76 +NULL NULL NULL NULL 76 val_76 +NULL NULL NULL NULL 77 val_77 +NULL NULL NULL NULL 78 val_78 +NULL NULL NULL NULL 8 val_8 +NULL NULL NULL NULL 80 val_80 +NULL NULL NULL NULL 82 val_82 +NULL NULL NULL NULL 83 val_83 +NULL NULL NULL NULL 83 val_83 +NULL NULL NULL NULL 84 val_84 +NULL NULL NULL NULL 84 val_84 +NULL NULL NULL NULL 85 val_85 +NULL NULL NULL NULL 86 val_86 +NULL NULL NULL NULL 87 val_87 +NULL NULL NULL NULL 9 val_9 +NULL NULL NULL NULL 90 val_90 +NULL NULL NULL NULL 90 val_90 +NULL NULL NULL NULL 90 val_90 +NULL NULL NULL NULL 92 val_92 +NULL NULL NULL NULL 95 val_95 +NULL NULL NULL NULL 95 val_95 +NULL NULL NULL NULL 96 val_96 +NULL NULL NULL NULL 97 val_97 +NULL NULL NULL NULL 97 val_97 +NULL NULL NULL NULL 98 val_98 +NULL NULL NULL NULL 98 val_98 diff --git ql/src/test/results/clientpositive/spark/auto_join22.q.out ql/src/test/results/clientpositive/spark/auto_join22.q.out new file mode 100644 index 0000000..516322c --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join22.q.out @@ -0,0 +1,136 @@ +PREHOOK: query: explain +SELECT sum(hash(src5.src1_value)) FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key FROM src src4 JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value FROM src src1 JOIN src src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT sum(hash(src5.src1_value)) FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key FROM src src4 JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value FROM src src1 JOIN src src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: src4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {VALUE._col2} + outputColumnNames: _col8 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col8 (type: string) + outputColumnNames: _col3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(src5.src1_value)) FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key FROM src src4 JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value FROM src src1 JOIN src src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(src5.src1_value)) FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key FROM src src4 JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value FROM src src1 JOIN src src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +344337359100 diff --git ql/src/test/results/clientpositive/spark/auto_join23.q.out ql/src/test/results/clientpositive/spark/auto_join23.q.out new file mode 100644 index 0000000..ce5a670 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join23.q.out @@ -0,0 +1,188 @@ +PREHOOK: query: explain +SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 2 val_2 +0 val_0 2 val_2 +0 val_0 2 val_2 +0 val_0 4 val_4 +0 val_0 4 val_4 +0 val_0 4 val_4 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 5 val_5 +0 val_0 8 val_8 +0 val_0 8 val_8 +0 val_0 8 val_8 +0 val_0 9 val_9 +0 val_0 9 val_9 +0 val_0 9 val_9 +2 val_2 0 val_0 +2 val_2 0 val_0 +2 val_2 0 val_0 +2 val_2 2 val_2 +2 val_2 4 val_4 +2 val_2 5 val_5 +2 val_2 5 val_5 +2 val_2 5 val_5 +2 val_2 8 val_8 +2 val_2 9 val_9 +4 val_4 0 val_0 +4 val_4 0 val_0 +4 val_4 0 val_0 +4 val_4 2 val_2 +4 val_4 4 val_4 +4 val_4 5 val_5 +4 val_4 5 val_5 +4 val_4 5 val_5 +4 val_4 8 val_8 +4 val_4 9 val_9 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 2 val_2 +5 val_5 2 val_2 +5 val_5 2 val_2 +5 val_5 4 val_4 +5 val_5 4 val_4 +5 val_5 4 val_4 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 8 val_8 +5 val_5 8 val_8 +5 val_5 8 val_8 +5 val_5 9 val_9 +5 val_5 9 val_9 +5 val_5 9 val_9 +8 val_8 0 val_0 +8 val_8 0 val_0 +8 val_8 0 val_0 +8 val_8 2 val_2 +8 val_8 4 val_4 +8 val_8 5 val_5 +8 val_8 5 val_5 +8 val_8 5 val_5 +8 val_8 8 val_8 +8 val_8 9 val_9 +9 val_9 0 val_0 +9 val_9 0 val_0 +9 val_9 0 val_0 +9 val_9 2 val_2 +9 val_9 4 val_4 +9 val_9 5 val_5 +9 val_9 5 val_5 +9 val_9 5 val_5 +9 val_9 8 val_8 +9 val_9 9 val_9 diff --git ql/src/test/results/clientpositive/spark/auto_join24.q.out ql/src/test/results/clientpositive/spark/auto_join24.q.out new file mode 100644 index 0000000..15b8888 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join24.q.out @@ -0,0 +1,123 @@ +PREHOOK: query: create table tst1(key STRING, cnt INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tst1 +POSTHOOK: query: create table tst1(key STRING, cnt INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tst1 +PREHOOK: query: INSERT OVERWRITE TABLE tst1 +SELECT a.key, count(1) FROM src a group by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tst1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE tst1 +SELECT a.key, count(1) FROM src a group by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tst1 +POSTHOOK: Lineage: tst1.cnt EXPRESSION [(src)a.null, ] +POSTHOOK: Lineage: tst1.key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: explain +SELECT sum(a.cnt) FROM tst1 a JOIN tst1 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT sum(a.cnt) FROM tst1 a JOIN tst1 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1791 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 9 Data size: 948 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 9 Data size: 948 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 17 Data size: 1791 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 9 Data size: 948 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 9 Data size: 948 Basic stats: COMPLETE Column stats: NONE + value expressions: cnt (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 9 Data size: 1042 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 9 Data size: 1042 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(a.cnt) FROM tst1 a JOIN tst1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tst1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(a.cnt) FROM tst1 a JOIN tst1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tst1 +#### A masked pattern was here #### +500 diff --git ql/src/test/results/clientpositive/spark/auto_join25.q.out ql/src/test/results/clientpositive/spark/auto_join25.q.out new file mode 100644 index 0000000..f0f45cb --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join25.q.out @@ -0,0 +1,122 @@ +PREHOOK: query: -- This test tests the scenario when the mapper dies. So, create a conditional task for the mapjoin +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: -- This test tests the scenario when the mapper dies. So, create a conditional task for the mapjoin +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +RUN: Stage-0:DDL +PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@dest1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +RUN: Stage-1:MAPRED +RUN: Stage-2:DEPENDENCY_COLLECTION +RUN: Stage-0:MOVE +RUN: Stage-3:STATS +PREHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +RUN: Stage-1:MAPRED +407444119660 +PREHOOK: query: CREATE TABLE dest_j2(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j2 +POSTHOOK: query: CREATE TABLE dest_j2(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j2 +RUN: Stage-0:DDL +PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) +INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_j2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) +INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_j2 +POSTHOOK: Lineage: dest_j2.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.value SIMPLE [(src)src3.FieldSchema(name:value, type:string, comment:default), ] +RUN: Stage-1:MAPRED +RUN: Stage-2:DEPENDENCY_COLLECTION +RUN: Stage-0:MOVE +RUN: Stage-3:STATS +PREHOOK: query: SELECT sum(hash(dest_j2.key,dest_j2.value)) FROM dest_j2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest_j2.key,dest_j2.value)) FROM dest_j2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j2 +#### A masked pattern was here #### +RUN: Stage-1:MAPRED +33815990627 +PREHOOK: query: CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: CREATE TABLE dest_j1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j1 +RUN: Stage-0:DDL +PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_j1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +RUN: Stage-1:MAPRED +RUN: Stage-2:DEPENDENCY_COLLECTION +RUN: Stage-0:MOVE +RUN: Stage-3:STATS +PREHOOK: query: SELECT sum(hash(dest_j1.key,dest_j1.value)) FROM dest_j1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest_j1.key,dest_j1.value)) FROM dest_j1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +RUN: Stage-1:MAPRED +101861029915 diff --git ql/src/test/results/clientpositive/spark/auto_join27.q.out ql/src/test/results/clientpositive/spark/auto_join27.q.out new file mode 100644 index 0000000..67f5739 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join27.q.out @@ -0,0 +1,177 @@ +PREHOOK: query: explain +SELECT count(1) +FROM +( +SELECT src.key, src.value from src +UNION ALL +SELECT DISTINCT src.key, src.value from src +) src_12 +JOIN +(SELECT src.key as k, src.value as v from src) src3 +ON src_12.key = src3.k AND src3.k < 200 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT count(1) +FROM +( +SELECT src.key, src.value from src +UNION ALL +SELECT DISTINCT src.key, src.value from src +) src_12 +JOIN +(SELECT src.key as k, src.value as v from src) src3 +ON src_12.key = src3.k AND src3.k < 200 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 1), Union 3 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) + Union 3 <- Map 6 (NONE, 0), Reducer 2 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 200) and key is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: ((key < 200) and key is not null) (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 200) and key is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 136 Data size: 1447 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 136 Data size: 1447 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(1) +FROM +( +SELECT src.key, src.value from src +UNION ALL +SELECT DISTINCT src.key, src.value from src +) src_12 +JOIN +(SELECT src.key as k, src.value as v from src) src3 +ON src_12.key = src3.k AND src3.k < 200 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(1) +FROM +( +SELECT src.key, src.value from src +UNION ALL +SELECT DISTINCT src.key, src.value from src +) src_12 +JOIN +(SELECT src.key as k, src.value as v from src) src3 +ON src_12.key = src3.k AND src3.k < 200 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +548 diff --git ql/src/test/results/clientpositive/spark/auto_join28.q.out ql/src/test/results/clientpositive/spark/auto_join28.q.out new file mode 100644 index 0000000..b979661 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join28.q.out @@ -0,0 +1,398 @@ +PREHOOK: query: explain +SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 {(KEY.reducesinkkey0 < 10)} + 1 + 2 {(KEY.reducesinkkey0 < 10)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 10)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 {(KEY.reducesinkkey0 < 10)} + 1 + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 > 10)} + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 > 10)} + 2 {(KEY.reducesinkkey0 < 10)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/auto_join29.q.out ql/src/test/results/clientpositive/spark/auto_join29.q.out new file mode 100644 index 0000000..0951b8d --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join29.q.out @@ -0,0 +1,3522 @@ +PREHOOK: query: explain +SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 {(KEY.reducesinkkey0 < 10)} + 1 + 2 {(KEY.reducesinkkey0 < 10)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL NULL NULL NULL 0 val_0 +NULL NULL NULL NULL 0 val_0 +NULL NULL NULL NULL 0 val_0 +NULL NULL NULL NULL 10 val_10 +NULL NULL NULL NULL 100 val_100 +NULL NULL NULL NULL 100 val_100 +NULL NULL NULL NULL 103 val_103 +NULL NULL NULL NULL 103 val_103 +NULL NULL NULL NULL 104 val_104 +NULL NULL NULL NULL 104 val_104 +NULL NULL NULL NULL 105 val_105 +NULL NULL NULL NULL 11 val_11 +NULL NULL NULL NULL 111 val_111 +NULL NULL NULL NULL 113 val_113 +NULL NULL NULL NULL 113 val_113 +NULL NULL NULL NULL 114 val_114 +NULL NULL NULL NULL 116 val_116 +NULL NULL NULL NULL 118 val_118 +NULL NULL NULL NULL 118 val_118 +NULL NULL NULL NULL 119 val_119 +NULL NULL NULL NULL 119 val_119 +NULL NULL NULL NULL 119 val_119 +NULL NULL NULL NULL 12 val_12 +NULL NULL NULL NULL 12 val_12 +NULL NULL NULL NULL 120 val_120 +NULL NULL NULL NULL 120 val_120 +NULL NULL NULL NULL 125 val_125 +NULL NULL NULL NULL 125 val_125 +NULL NULL NULL NULL 126 val_126 +NULL NULL NULL NULL 128 val_128 +NULL NULL NULL NULL 128 val_128 +NULL NULL NULL NULL 128 val_128 +NULL NULL NULL NULL 129 val_129 +NULL NULL NULL NULL 129 val_129 +NULL NULL NULL NULL 131 val_131 +NULL NULL NULL NULL 133 val_133 +NULL NULL NULL NULL 134 val_134 +NULL NULL NULL NULL 134 val_134 +NULL NULL NULL NULL 136 val_136 +NULL NULL NULL NULL 137 val_137 +NULL NULL NULL NULL 137 val_137 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 143 val_143 +NULL NULL NULL NULL 145 val_145 +NULL NULL NULL NULL 146 val_146 +NULL NULL NULL NULL 146 val_146 +NULL NULL NULL NULL 149 val_149 +NULL NULL NULL NULL 149 val_149 +NULL NULL NULL NULL 15 val_15 +NULL NULL NULL NULL 15 val_15 +NULL NULL NULL NULL 150 val_150 +NULL NULL NULL NULL 152 val_152 +NULL NULL NULL NULL 152 val_152 +NULL NULL NULL NULL 153 val_153 +NULL NULL NULL NULL 155 val_155 +NULL NULL NULL NULL 156 val_156 +NULL NULL NULL NULL 157 val_157 +NULL NULL NULL NULL 158 val_158 +NULL NULL NULL NULL 160 val_160 +NULL NULL NULL NULL 162 val_162 +NULL NULL NULL NULL 163 val_163 +NULL NULL NULL NULL 164 val_164 +NULL NULL NULL NULL 164 val_164 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 166 val_166 +NULL NULL NULL NULL 167 val_167 +NULL NULL NULL NULL 167 val_167 +NULL NULL NULL NULL 167 val_167 +NULL NULL NULL NULL 168 val_168 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 17 val_17 +NULL NULL NULL NULL 170 val_170 +NULL NULL NULL NULL 172 val_172 +NULL NULL NULL NULL 172 val_172 +NULL NULL NULL NULL 174 val_174 +NULL NULL NULL NULL 174 val_174 +NULL NULL NULL NULL 175 val_175 +NULL NULL NULL NULL 175 val_175 +NULL NULL NULL NULL 176 val_176 +NULL NULL NULL NULL 176 val_176 +NULL NULL NULL NULL 177 val_177 +NULL NULL NULL NULL 178 val_178 +NULL NULL NULL NULL 179 val_179 +NULL NULL NULL NULL 179 val_179 +NULL NULL NULL NULL 18 val_18 +NULL NULL NULL NULL 18 val_18 +NULL NULL NULL NULL 180 val_180 +NULL NULL NULL NULL 181 val_181 +NULL NULL NULL NULL 183 val_183 +NULL NULL NULL NULL 186 val_186 +NULL NULL NULL NULL 187 val_187 +NULL NULL NULL NULL 187 val_187 +NULL NULL NULL NULL 187 val_187 +NULL NULL NULL NULL 189 val_189 +NULL NULL NULL NULL 19 val_19 +NULL NULL NULL NULL 190 val_190 +NULL NULL NULL NULL 191 val_191 +NULL NULL NULL NULL 191 val_191 +NULL NULL NULL NULL 192 val_192 +NULL NULL NULL NULL 193 val_193 +NULL NULL NULL NULL 193 val_193 +NULL NULL NULL NULL 193 val_193 +NULL NULL NULL NULL 194 val_194 +NULL NULL NULL NULL 195 val_195 +NULL NULL NULL NULL 195 val_195 +NULL NULL NULL NULL 196 val_196 +NULL NULL NULL NULL 197 val_197 +NULL NULL NULL NULL 197 val_197 +NULL NULL NULL NULL 199 val_199 +NULL NULL NULL NULL 199 val_199 +NULL NULL NULL NULL 199 val_199 +NULL NULL NULL NULL 2 val_2 +NULL NULL NULL NULL 20 val_20 +NULL NULL NULL NULL 200 val_200 +NULL NULL NULL NULL 200 val_200 +NULL NULL NULL NULL 201 val_201 +NULL NULL NULL NULL 202 val_202 +NULL NULL NULL NULL 203 val_203 +NULL NULL NULL NULL 203 val_203 +NULL NULL NULL NULL 205 val_205 +NULL NULL NULL NULL 205 val_205 +NULL NULL NULL NULL 207 val_207 +NULL NULL NULL NULL 207 val_207 +NULL NULL NULL NULL 208 val_208 +NULL NULL NULL NULL 208 val_208 +NULL NULL NULL NULL 208 val_208 +NULL NULL NULL NULL 209 val_209 +NULL NULL NULL NULL 209 val_209 +NULL NULL NULL NULL 213 val_213 +NULL NULL NULL NULL 213 val_213 +NULL NULL NULL NULL 214 val_214 +NULL NULL NULL NULL 216 val_216 +NULL NULL NULL NULL 216 val_216 +NULL NULL NULL NULL 217 val_217 +NULL NULL NULL NULL 217 val_217 +NULL NULL NULL NULL 218 val_218 +NULL NULL NULL NULL 219 val_219 +NULL NULL NULL NULL 219 val_219 +NULL NULL NULL NULL 221 val_221 +NULL NULL NULL NULL 221 val_221 +NULL NULL NULL NULL 222 val_222 +NULL NULL NULL NULL 223 val_223 +NULL NULL NULL NULL 223 val_223 +NULL NULL NULL NULL 224 val_224 +NULL NULL NULL NULL 224 val_224 +NULL NULL NULL NULL 226 val_226 +NULL NULL NULL NULL 228 val_228 +NULL NULL NULL NULL 229 val_229 +NULL NULL NULL NULL 229 val_229 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 233 val_233 +NULL NULL NULL NULL 233 val_233 +NULL NULL NULL NULL 235 val_235 +NULL NULL NULL NULL 237 val_237 +NULL NULL NULL NULL 237 val_237 +NULL NULL NULL NULL 238 val_238 +NULL NULL NULL NULL 238 val_238 +NULL NULL NULL NULL 239 val_239 +NULL NULL NULL NULL 239 val_239 +NULL NULL NULL NULL 24 val_24 +NULL NULL NULL NULL 24 val_24 +NULL NULL NULL NULL 241 val_241 +NULL NULL NULL NULL 242 val_242 +NULL NULL NULL NULL 242 val_242 +NULL NULL NULL NULL 244 val_244 +NULL NULL NULL NULL 247 val_247 +NULL NULL NULL NULL 248 val_248 +NULL NULL NULL NULL 249 val_249 +NULL NULL NULL NULL 252 val_252 +NULL NULL NULL NULL 255 val_255 +NULL NULL NULL NULL 255 val_255 +NULL NULL NULL NULL 256 val_256 +NULL NULL NULL NULL 256 val_256 +NULL NULL NULL NULL 257 val_257 +NULL NULL NULL NULL 258 val_258 +NULL NULL NULL NULL 26 val_26 +NULL NULL NULL NULL 26 val_26 +NULL NULL NULL NULL 260 val_260 +NULL NULL NULL NULL 262 val_262 +NULL NULL NULL NULL 263 val_263 +NULL NULL NULL NULL 265 val_265 +NULL NULL NULL NULL 265 val_265 +NULL NULL NULL NULL 266 val_266 +NULL NULL NULL NULL 27 val_27 +NULL NULL NULL NULL 272 val_272 +NULL NULL NULL NULL 272 val_272 +NULL NULL NULL NULL 273 val_273 +NULL NULL NULL NULL 273 val_273 +NULL NULL NULL NULL 273 val_273 +NULL NULL NULL NULL 274 val_274 +NULL NULL NULL NULL 275 val_275 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 278 val_278 +NULL NULL NULL NULL 278 val_278 +NULL NULL NULL NULL 28 val_28 +NULL NULL NULL NULL 280 val_280 +NULL NULL NULL NULL 280 val_280 +NULL NULL NULL NULL 281 val_281 +NULL NULL NULL NULL 281 val_281 +NULL NULL NULL NULL 282 val_282 +NULL NULL NULL NULL 282 val_282 +NULL NULL NULL NULL 283 val_283 +NULL NULL NULL NULL 284 val_284 +NULL NULL NULL NULL 285 val_285 +NULL NULL NULL NULL 286 val_286 +NULL NULL NULL NULL 287 val_287 +NULL NULL NULL NULL 288 val_288 +NULL NULL NULL NULL 288 val_288 +NULL NULL NULL NULL 289 val_289 +NULL NULL NULL NULL 291 val_291 +NULL NULL NULL NULL 292 val_292 +NULL NULL NULL NULL 296 val_296 +NULL NULL NULL NULL 298 val_298 +NULL NULL NULL NULL 298 val_298 +NULL NULL NULL NULL 298 val_298 +NULL NULL NULL NULL 30 val_30 +NULL NULL NULL NULL 302 val_302 +NULL NULL NULL NULL 305 val_305 +NULL NULL NULL NULL 306 val_306 +NULL NULL NULL NULL 307 val_307 +NULL NULL NULL NULL 307 val_307 +NULL NULL NULL NULL 308 val_308 +NULL NULL NULL NULL 309 val_309 +NULL NULL NULL NULL 309 val_309 +NULL NULL NULL NULL 310 val_310 +NULL NULL NULL NULL 311 val_311 +NULL NULL NULL NULL 311 val_311 +NULL NULL NULL NULL 311 val_311 +NULL NULL NULL NULL 315 val_315 +NULL NULL NULL NULL 316 val_316 +NULL NULL NULL NULL 316 val_316 +NULL NULL NULL NULL 316 val_316 +NULL NULL NULL NULL 317 val_317 +NULL NULL NULL NULL 317 val_317 +NULL NULL NULL NULL 318 val_318 +NULL NULL NULL NULL 318 val_318 +NULL NULL NULL NULL 318 val_318 +NULL NULL NULL NULL 321 val_321 +NULL NULL NULL NULL 321 val_321 +NULL NULL NULL NULL 322 val_322 +NULL NULL NULL NULL 322 val_322 +NULL NULL NULL NULL 323 val_323 +NULL NULL NULL NULL 325 val_325 +NULL NULL NULL NULL 325 val_325 +NULL NULL NULL NULL 327 val_327 +NULL NULL NULL NULL 327 val_327 +NULL NULL NULL NULL 327 val_327 +NULL NULL NULL NULL 33 val_33 +NULL NULL NULL NULL 331 val_331 +NULL NULL NULL NULL 331 val_331 +NULL NULL NULL NULL 332 val_332 +NULL NULL NULL NULL 333 val_333 +NULL NULL NULL NULL 333 val_333 +NULL NULL NULL NULL 335 val_335 +NULL NULL NULL NULL 336 val_336 +NULL NULL NULL NULL 338 val_338 +NULL NULL NULL NULL 339 val_339 +NULL NULL NULL NULL 34 val_34 +NULL NULL NULL NULL 341 val_341 +NULL NULL NULL NULL 342 val_342 +NULL NULL NULL NULL 342 val_342 +NULL NULL NULL NULL 344 val_344 +NULL NULL NULL NULL 344 val_344 +NULL NULL NULL NULL 345 val_345 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 35 val_35 +NULL NULL NULL NULL 35 val_35 +NULL NULL NULL NULL 35 val_35 +NULL NULL NULL NULL 351 val_351 +NULL NULL NULL NULL 353 val_353 +NULL NULL NULL NULL 353 val_353 +NULL NULL NULL NULL 356 val_356 +NULL NULL NULL NULL 360 val_360 +NULL NULL NULL NULL 362 val_362 +NULL NULL NULL NULL 364 val_364 +NULL NULL NULL NULL 365 val_365 +NULL NULL NULL NULL 366 val_366 +NULL NULL NULL NULL 367 val_367 +NULL NULL NULL NULL 367 val_367 +NULL NULL NULL NULL 368 val_368 +NULL NULL NULL NULL 369 val_369 +NULL NULL NULL NULL 369 val_369 +NULL NULL NULL NULL 369 val_369 +NULL NULL NULL NULL 37 val_37 +NULL NULL NULL NULL 37 val_37 +NULL NULL NULL NULL 373 val_373 +NULL NULL NULL NULL 374 val_374 +NULL NULL NULL NULL 375 val_375 +NULL NULL NULL NULL 377 val_377 +NULL NULL NULL NULL 378 val_378 +NULL NULL NULL NULL 379 val_379 +NULL NULL NULL NULL 382 val_382 +NULL NULL NULL NULL 382 val_382 +NULL NULL NULL NULL 384 val_384 +NULL NULL NULL NULL 384 val_384 +NULL NULL NULL NULL 384 val_384 +NULL NULL NULL NULL 386 val_386 +NULL NULL NULL NULL 389 val_389 +NULL NULL NULL NULL 392 val_392 +NULL NULL NULL NULL 393 val_393 +NULL NULL NULL NULL 394 val_394 +NULL NULL NULL NULL 395 val_395 +NULL NULL NULL NULL 395 val_395 +NULL NULL NULL NULL 396 val_396 +NULL NULL NULL NULL 396 val_396 +NULL NULL NULL NULL 396 val_396 +NULL NULL NULL NULL 397 val_397 +NULL NULL NULL NULL 397 val_397 +NULL NULL NULL NULL 399 val_399 +NULL NULL NULL NULL 399 val_399 +NULL NULL NULL NULL 4 val_4 +NULL NULL NULL NULL 400 val_400 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 402 val_402 +NULL NULL NULL NULL 403 val_403 +NULL NULL NULL NULL 403 val_403 +NULL NULL NULL NULL 403 val_403 +NULL NULL NULL NULL 404 val_404 +NULL NULL NULL NULL 404 val_404 +NULL NULL NULL NULL 406 val_406 +NULL NULL NULL NULL 406 val_406 +NULL NULL NULL NULL 406 val_406 +NULL NULL NULL NULL 406 val_406 +NULL NULL NULL NULL 407 val_407 +NULL NULL NULL NULL 409 val_409 +NULL NULL NULL NULL 409 val_409 +NULL NULL NULL NULL 409 val_409 +NULL NULL NULL NULL 41 val_41 +NULL NULL NULL NULL 411 val_411 +NULL NULL NULL NULL 413 val_413 +NULL NULL NULL NULL 413 val_413 +NULL NULL NULL NULL 414 val_414 +NULL NULL NULL NULL 414 val_414 +NULL NULL NULL NULL 417 val_417 +NULL NULL NULL NULL 417 val_417 +NULL NULL NULL NULL 417 val_417 +NULL NULL NULL NULL 418 val_418 +NULL NULL NULL NULL 419 val_419 +NULL NULL NULL NULL 42 val_42 +NULL NULL NULL NULL 42 val_42 +NULL NULL NULL NULL 421 val_421 +NULL NULL NULL NULL 424 val_424 +NULL NULL NULL NULL 424 val_424 +NULL NULL NULL NULL 427 val_427 +NULL NULL NULL NULL 429 val_429 +NULL NULL NULL NULL 429 val_429 +NULL NULL NULL NULL 43 val_43 +NULL NULL NULL NULL 430 val_430 +NULL NULL NULL NULL 430 val_430 +NULL NULL NULL NULL 430 val_430 +NULL NULL NULL NULL 431 val_431 +NULL NULL NULL NULL 431 val_431 +NULL NULL NULL NULL 431 val_431 +NULL NULL NULL NULL 432 val_432 +NULL NULL NULL NULL 435 val_435 +NULL NULL NULL NULL 436 val_436 +NULL NULL NULL NULL 437 val_437 +NULL NULL NULL NULL 438 val_438 +NULL NULL NULL NULL 438 val_438 +NULL NULL NULL NULL 438 val_438 +NULL NULL NULL NULL 439 val_439 +NULL NULL NULL NULL 439 val_439 +NULL NULL NULL NULL 44 val_44 +NULL NULL NULL NULL 443 val_443 +NULL NULL NULL NULL 444 val_444 +NULL NULL NULL NULL 446 val_446 +NULL NULL NULL NULL 448 val_448 +NULL NULL NULL NULL 449 val_449 +NULL NULL NULL NULL 452 val_452 +NULL NULL NULL NULL 453 val_453 +NULL NULL NULL NULL 454 val_454 +NULL NULL NULL NULL 454 val_454 +NULL NULL NULL NULL 454 val_454 +NULL NULL NULL NULL 455 val_455 +NULL NULL NULL NULL 457 val_457 +NULL NULL NULL NULL 458 val_458 +NULL NULL NULL NULL 458 val_458 +NULL NULL NULL NULL 459 val_459 +NULL NULL NULL NULL 459 val_459 +NULL NULL NULL NULL 460 val_460 +NULL NULL NULL NULL 462 val_462 +NULL NULL NULL NULL 462 val_462 +NULL NULL NULL NULL 463 val_463 +NULL NULL NULL NULL 463 val_463 +NULL NULL NULL NULL 466 val_466 +NULL NULL NULL NULL 466 val_466 +NULL NULL NULL NULL 466 val_466 +NULL NULL NULL NULL 467 val_467 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 47 val_47 +NULL NULL NULL NULL 470 val_470 +NULL NULL NULL NULL 472 val_472 +NULL NULL NULL NULL 475 val_475 +NULL NULL NULL NULL 477 val_477 +NULL NULL NULL NULL 478 val_478 +NULL NULL NULL NULL 478 val_478 +NULL NULL NULL NULL 479 val_479 +NULL NULL NULL NULL 480 val_480 +NULL NULL NULL NULL 480 val_480 +NULL NULL NULL NULL 480 val_480 +NULL NULL NULL NULL 481 val_481 +NULL NULL NULL NULL 482 val_482 +NULL NULL NULL NULL 483 val_483 +NULL NULL NULL NULL 484 val_484 +NULL NULL NULL NULL 485 val_485 +NULL NULL NULL NULL 487 val_487 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 490 val_490 +NULL NULL NULL NULL 491 val_491 +NULL NULL NULL NULL 492 val_492 +NULL NULL NULL NULL 492 val_492 +NULL NULL NULL NULL 493 val_493 +NULL NULL NULL NULL 494 val_494 +NULL NULL NULL NULL 495 val_495 +NULL NULL NULL NULL 496 val_496 +NULL NULL NULL NULL 497 val_497 +NULL NULL NULL NULL 498 val_498 +NULL NULL NULL NULL 498 val_498 +NULL NULL NULL NULL 498 val_498 +NULL NULL NULL NULL 5 val_5 +NULL NULL NULL NULL 5 val_5 +NULL NULL NULL NULL 5 val_5 +NULL NULL NULL NULL 51 val_51 +NULL NULL NULL NULL 51 val_51 +NULL NULL NULL NULL 53 val_53 +NULL NULL NULL NULL 54 val_54 +NULL NULL NULL NULL 57 val_57 +NULL NULL NULL NULL 58 val_58 +NULL NULL NULL NULL 58 val_58 +NULL NULL NULL NULL 64 val_64 +NULL NULL NULL NULL 65 val_65 +NULL NULL NULL NULL 66 val_66 +NULL NULL NULL NULL 67 val_67 +NULL NULL NULL NULL 67 val_67 +NULL NULL NULL NULL 69 val_69 +NULL NULL NULL NULL 70 val_70 +NULL NULL NULL NULL 70 val_70 +NULL NULL NULL NULL 70 val_70 +NULL NULL NULL NULL 72 val_72 +NULL NULL NULL NULL 72 val_72 +NULL NULL NULL NULL 74 val_74 +NULL NULL NULL NULL 76 val_76 +NULL NULL NULL NULL 76 val_76 +NULL NULL NULL NULL 77 val_77 +NULL NULL NULL NULL 78 val_78 +NULL NULL NULL NULL 8 val_8 +NULL NULL NULL NULL 80 val_80 +NULL NULL NULL NULL 82 val_82 +NULL NULL NULL NULL 83 val_83 +NULL NULL NULL NULL 83 val_83 +NULL NULL NULL NULL 84 val_84 +NULL NULL NULL NULL 84 val_84 +NULL NULL NULL NULL 85 val_85 +NULL NULL NULL NULL 86 val_86 +NULL NULL NULL NULL 87 val_87 +NULL NULL NULL NULL 9 val_9 +NULL NULL NULL NULL 90 val_90 +NULL NULL NULL NULL 90 val_90 +NULL NULL NULL NULL 90 val_90 +NULL NULL NULL NULL 92 val_92 +NULL NULL NULL NULL 95 val_95 +NULL NULL NULL NULL 95 val_95 +NULL NULL NULL NULL 96 val_96 +NULL NULL NULL NULL 97 val_97 +NULL NULL NULL NULL 97 val_97 +NULL NULL NULL NULL 98 val_98 +NULL NULL NULL NULL 98 val_98 +PREHOOK: query: explain +SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 10)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 {(KEY.reducesinkkey0 < 10)} + 1 + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 NULL NULL NULL NULL +0 val_0 NULL NULL NULL NULL +0 val_0 NULL NULL NULL NULL +10 val_10 NULL NULL NULL NULL +100 val_100 NULL NULL NULL NULL +100 val_100 NULL NULL NULL NULL +103 val_103 NULL NULL NULL NULL +103 val_103 NULL NULL NULL NULL +104 val_104 NULL NULL NULL NULL +104 val_104 NULL NULL NULL NULL +105 val_105 NULL NULL NULL NULL +11 val_11 NULL NULL NULL NULL +111 val_111 NULL NULL NULL NULL +113 val_113 NULL NULL NULL NULL +113 val_113 NULL NULL NULL NULL +114 val_114 NULL NULL NULL NULL +116 val_116 NULL NULL NULL NULL +118 val_118 NULL NULL NULL NULL +118 val_118 NULL NULL NULL NULL +119 val_119 NULL NULL NULL NULL +119 val_119 NULL NULL NULL NULL +119 val_119 NULL NULL NULL NULL +12 val_12 NULL NULL NULL NULL +12 val_12 NULL NULL NULL NULL +120 val_120 NULL NULL NULL NULL +120 val_120 NULL NULL NULL NULL +125 val_125 NULL NULL NULL NULL +125 val_125 NULL NULL NULL NULL +126 val_126 NULL NULL NULL NULL +128 val_128 NULL NULL NULL NULL +128 val_128 NULL NULL NULL NULL +128 val_128 NULL NULL NULL NULL +129 val_129 NULL NULL NULL NULL +129 val_129 NULL NULL NULL NULL +131 val_131 NULL NULL NULL NULL +133 val_133 NULL NULL NULL NULL +134 val_134 NULL NULL NULL NULL +134 val_134 NULL NULL NULL NULL +136 val_136 NULL NULL NULL NULL +137 val_137 NULL NULL NULL NULL +137 val_137 NULL NULL NULL NULL +138 val_138 NULL NULL NULL NULL +138 val_138 NULL NULL NULL NULL +138 val_138 NULL NULL NULL NULL +138 val_138 NULL NULL NULL NULL +143 val_143 NULL NULL NULL NULL +145 val_145 NULL NULL NULL NULL +146 val_146 NULL NULL NULL NULL +146 val_146 NULL NULL NULL NULL +149 val_149 NULL NULL NULL NULL +149 val_149 NULL NULL NULL NULL +15 val_15 NULL NULL NULL NULL +15 val_15 NULL NULL NULL NULL +150 val_150 NULL NULL NULL NULL +152 val_152 NULL NULL NULL NULL +152 val_152 NULL NULL NULL NULL +153 val_153 NULL NULL NULL NULL +155 val_155 NULL NULL NULL NULL +156 val_156 NULL NULL NULL NULL +157 val_157 NULL NULL NULL NULL +158 val_158 NULL NULL NULL NULL +160 val_160 NULL NULL NULL NULL +162 val_162 NULL NULL NULL NULL +163 val_163 NULL NULL NULL NULL +164 val_164 NULL NULL NULL NULL +164 val_164 NULL NULL NULL NULL +165 val_165 NULL NULL NULL NULL +165 val_165 NULL NULL NULL NULL +166 val_166 NULL NULL NULL NULL +167 val_167 NULL NULL NULL NULL +167 val_167 NULL NULL NULL NULL +167 val_167 NULL NULL NULL NULL +168 val_168 NULL NULL NULL NULL +169 val_169 NULL NULL NULL NULL +169 val_169 NULL NULL NULL NULL +169 val_169 NULL NULL NULL NULL +169 val_169 NULL NULL NULL NULL +17 val_17 NULL NULL NULL NULL +170 val_170 NULL NULL NULL NULL +172 val_172 NULL NULL NULL NULL +172 val_172 NULL NULL NULL NULL +174 val_174 NULL NULL NULL NULL +174 val_174 NULL NULL NULL NULL +175 val_175 NULL NULL NULL NULL +175 val_175 NULL NULL NULL NULL +176 val_176 NULL NULL NULL NULL +176 val_176 NULL NULL NULL NULL +177 val_177 NULL NULL NULL NULL +178 val_178 NULL NULL NULL NULL +179 val_179 NULL NULL NULL NULL +179 val_179 NULL NULL NULL NULL +18 val_18 NULL NULL NULL NULL +18 val_18 NULL NULL NULL NULL +180 val_180 NULL NULL NULL NULL +181 val_181 NULL NULL NULL NULL +183 val_183 NULL NULL NULL NULL +186 val_186 NULL NULL NULL NULL +187 val_187 NULL NULL NULL NULL +187 val_187 NULL NULL NULL NULL +187 val_187 NULL NULL NULL NULL +189 val_189 NULL NULL NULL NULL +19 val_19 NULL NULL NULL NULL +190 val_190 NULL NULL NULL NULL +191 val_191 NULL NULL NULL NULL +191 val_191 NULL NULL NULL NULL +192 val_192 NULL NULL NULL NULL +193 val_193 NULL NULL NULL NULL +193 val_193 NULL NULL NULL NULL +193 val_193 NULL NULL NULL NULL +194 val_194 NULL NULL NULL NULL +195 val_195 NULL NULL NULL NULL +195 val_195 NULL NULL NULL NULL +196 val_196 NULL NULL NULL NULL +197 val_197 NULL NULL NULL NULL +197 val_197 NULL NULL NULL NULL +199 val_199 NULL NULL NULL NULL +199 val_199 NULL NULL NULL NULL +199 val_199 NULL NULL NULL NULL +2 val_2 NULL NULL NULL NULL +20 val_20 NULL NULL NULL NULL +200 val_200 NULL NULL NULL NULL +200 val_200 NULL NULL NULL NULL +201 val_201 NULL NULL NULL NULL +202 val_202 NULL NULL NULL NULL +203 val_203 NULL NULL NULL NULL +203 val_203 NULL NULL NULL NULL +205 val_205 NULL NULL NULL NULL +205 val_205 NULL NULL NULL NULL +207 val_207 NULL NULL NULL NULL +207 val_207 NULL NULL NULL NULL +208 val_208 NULL NULL NULL NULL +208 val_208 NULL NULL NULL NULL +208 val_208 NULL NULL NULL NULL +209 val_209 NULL NULL NULL NULL +209 val_209 NULL NULL NULL NULL +213 val_213 NULL NULL NULL NULL +213 val_213 NULL NULL NULL NULL +214 val_214 NULL NULL NULL NULL +216 val_216 NULL NULL NULL NULL +216 val_216 NULL NULL NULL NULL +217 val_217 NULL NULL NULL NULL +217 val_217 NULL NULL NULL NULL +218 val_218 NULL NULL NULL NULL +219 val_219 NULL NULL NULL NULL +219 val_219 NULL NULL NULL NULL +221 val_221 NULL NULL NULL NULL +221 val_221 NULL NULL NULL NULL +222 val_222 NULL NULL NULL NULL +223 val_223 NULL NULL NULL NULL +223 val_223 NULL NULL NULL NULL +224 val_224 NULL NULL NULL NULL +224 val_224 NULL NULL NULL NULL +226 val_226 NULL NULL NULL NULL +228 val_228 NULL NULL NULL NULL +229 val_229 NULL NULL NULL NULL +229 val_229 NULL NULL NULL NULL +230 val_230 NULL NULL NULL NULL +230 val_230 NULL NULL NULL NULL +230 val_230 NULL NULL NULL NULL +230 val_230 NULL NULL NULL NULL +230 val_230 NULL NULL NULL NULL +233 val_233 NULL NULL NULL NULL +233 val_233 NULL NULL NULL NULL +235 val_235 NULL NULL NULL NULL +237 val_237 NULL NULL NULL NULL +237 val_237 NULL NULL NULL NULL +238 val_238 NULL NULL NULL NULL +238 val_238 NULL NULL NULL NULL +239 val_239 NULL NULL NULL NULL +239 val_239 NULL NULL NULL NULL +24 val_24 NULL NULL NULL NULL +24 val_24 NULL NULL NULL NULL +241 val_241 NULL NULL NULL NULL +242 val_242 NULL NULL NULL NULL +242 val_242 NULL NULL NULL NULL +244 val_244 NULL NULL NULL NULL +247 val_247 NULL NULL NULL NULL +248 val_248 NULL NULL NULL NULL +249 val_249 NULL NULL NULL NULL +252 val_252 NULL NULL NULL NULL +255 val_255 NULL NULL NULL NULL +255 val_255 NULL NULL NULL NULL +256 val_256 NULL NULL NULL NULL +256 val_256 NULL NULL NULL NULL +257 val_257 NULL NULL NULL NULL +258 val_258 NULL NULL NULL NULL +26 val_26 NULL NULL NULL NULL +26 val_26 NULL NULL NULL NULL +260 val_260 NULL NULL NULL NULL +262 val_262 NULL NULL NULL NULL +263 val_263 NULL NULL NULL NULL +265 val_265 NULL NULL NULL NULL +265 val_265 NULL NULL NULL NULL +266 val_266 NULL NULL NULL NULL +27 val_27 NULL NULL NULL NULL +272 val_272 NULL NULL NULL NULL +272 val_272 NULL NULL NULL NULL +273 val_273 NULL NULL NULL NULL +273 val_273 NULL NULL NULL NULL +273 val_273 NULL NULL NULL NULL +274 val_274 NULL NULL NULL NULL +275 val_275 NULL NULL NULL NULL +277 val_277 NULL NULL NULL NULL +277 val_277 NULL NULL NULL NULL +277 val_277 NULL NULL NULL NULL +277 val_277 NULL NULL NULL NULL +278 val_278 NULL NULL NULL NULL +278 val_278 NULL NULL NULL NULL +28 val_28 NULL NULL NULL NULL +280 val_280 NULL NULL NULL NULL +280 val_280 NULL NULL NULL NULL +281 val_281 NULL NULL NULL NULL +281 val_281 NULL NULL NULL NULL +282 val_282 NULL NULL NULL NULL +282 val_282 NULL NULL NULL NULL +283 val_283 NULL NULL NULL NULL +284 val_284 NULL NULL NULL NULL +285 val_285 NULL NULL NULL NULL +286 val_286 NULL NULL NULL NULL +287 val_287 NULL NULL NULL NULL +288 val_288 NULL NULL NULL NULL +288 val_288 NULL NULL NULL NULL +289 val_289 NULL NULL NULL NULL +291 val_291 NULL NULL NULL NULL +292 val_292 NULL NULL NULL NULL +296 val_296 NULL NULL NULL NULL +298 val_298 NULL NULL NULL NULL +298 val_298 NULL NULL NULL NULL +298 val_298 NULL NULL NULL NULL +30 val_30 NULL NULL NULL NULL +302 val_302 NULL NULL NULL NULL +305 val_305 NULL NULL NULL NULL +306 val_306 NULL NULL NULL NULL +307 val_307 NULL NULL NULL NULL +307 val_307 NULL NULL NULL NULL +308 val_308 NULL NULL NULL NULL +309 val_309 NULL NULL NULL NULL +309 val_309 NULL NULL NULL NULL +310 val_310 NULL NULL NULL NULL +311 val_311 NULL NULL NULL NULL +311 val_311 NULL NULL NULL NULL +311 val_311 NULL NULL NULL NULL +315 val_315 NULL NULL NULL NULL +316 val_316 NULL NULL NULL NULL +316 val_316 NULL NULL NULL NULL +316 val_316 NULL NULL NULL NULL +317 val_317 NULL NULL NULL NULL +317 val_317 NULL NULL NULL NULL +318 val_318 NULL NULL NULL NULL +318 val_318 NULL NULL NULL NULL +318 val_318 NULL NULL NULL NULL +321 val_321 NULL NULL NULL NULL +321 val_321 NULL NULL NULL NULL +322 val_322 NULL NULL NULL NULL +322 val_322 NULL NULL NULL NULL +323 val_323 NULL NULL NULL NULL +325 val_325 NULL NULL NULL NULL +325 val_325 NULL NULL NULL NULL +327 val_327 NULL NULL NULL NULL +327 val_327 NULL NULL NULL NULL +327 val_327 NULL NULL NULL NULL +33 val_33 NULL NULL NULL NULL +331 val_331 NULL NULL NULL NULL +331 val_331 NULL NULL NULL NULL +332 val_332 NULL NULL NULL NULL +333 val_333 NULL NULL NULL NULL +333 val_333 NULL NULL NULL NULL +335 val_335 NULL NULL NULL NULL +336 val_336 NULL NULL NULL NULL +338 val_338 NULL NULL NULL NULL +339 val_339 NULL NULL NULL NULL +34 val_34 NULL NULL NULL NULL +341 val_341 NULL NULL NULL NULL +342 val_342 NULL NULL NULL NULL +342 val_342 NULL NULL NULL NULL +344 val_344 NULL NULL NULL NULL +344 val_344 NULL NULL NULL NULL +345 val_345 NULL NULL NULL NULL +348 val_348 NULL NULL NULL NULL +348 val_348 NULL NULL NULL NULL +348 val_348 NULL NULL NULL NULL +348 val_348 NULL NULL NULL NULL +348 val_348 NULL NULL NULL NULL +35 val_35 NULL NULL NULL NULL +35 val_35 NULL NULL NULL NULL +35 val_35 NULL NULL NULL NULL +351 val_351 NULL NULL NULL NULL +353 val_353 NULL NULL NULL NULL +353 val_353 NULL NULL NULL NULL +356 val_356 NULL NULL NULL NULL +360 val_360 NULL NULL NULL NULL +362 val_362 NULL NULL NULL NULL +364 val_364 NULL NULL NULL NULL +365 val_365 NULL NULL NULL NULL +366 val_366 NULL NULL NULL NULL +367 val_367 NULL NULL NULL NULL +367 val_367 NULL NULL NULL NULL +368 val_368 NULL NULL NULL NULL +369 val_369 NULL NULL NULL NULL +369 val_369 NULL NULL NULL NULL +369 val_369 NULL NULL NULL NULL +37 val_37 NULL NULL NULL NULL +37 val_37 NULL NULL NULL NULL +373 val_373 NULL NULL NULL NULL +374 val_374 NULL NULL NULL NULL +375 val_375 NULL NULL NULL NULL +377 val_377 NULL NULL NULL NULL +378 val_378 NULL NULL NULL NULL +379 val_379 NULL NULL NULL NULL +382 val_382 NULL NULL NULL NULL +382 val_382 NULL NULL NULL NULL +384 val_384 NULL NULL NULL NULL +384 val_384 NULL NULL NULL NULL +384 val_384 NULL NULL NULL NULL +386 val_386 NULL NULL NULL NULL +389 val_389 NULL NULL NULL NULL +392 val_392 NULL NULL NULL NULL +393 val_393 NULL NULL NULL NULL +394 val_394 NULL NULL NULL NULL +395 val_395 NULL NULL NULL NULL +395 val_395 NULL NULL NULL NULL +396 val_396 NULL NULL NULL NULL +396 val_396 NULL NULL NULL NULL +396 val_396 NULL NULL NULL NULL +397 val_397 NULL NULL NULL NULL +397 val_397 NULL NULL NULL NULL +399 val_399 NULL NULL NULL NULL +399 val_399 NULL NULL NULL NULL +4 val_4 NULL NULL NULL NULL +400 val_400 NULL NULL NULL NULL +401 val_401 NULL NULL NULL NULL +401 val_401 NULL NULL NULL NULL +401 val_401 NULL NULL NULL NULL +401 val_401 NULL NULL NULL NULL +401 val_401 NULL NULL NULL NULL +402 val_402 NULL NULL NULL NULL +403 val_403 NULL NULL NULL NULL +403 val_403 NULL NULL NULL NULL +403 val_403 NULL NULL NULL NULL +404 val_404 NULL NULL NULL NULL +404 val_404 NULL NULL NULL NULL +406 val_406 NULL NULL NULL NULL +406 val_406 NULL NULL NULL NULL +406 val_406 NULL NULL NULL NULL +406 val_406 NULL NULL NULL NULL +407 val_407 NULL NULL NULL NULL +409 val_409 NULL NULL NULL NULL +409 val_409 NULL NULL NULL NULL +409 val_409 NULL NULL NULL NULL +41 val_41 NULL NULL NULL NULL +411 val_411 NULL NULL NULL NULL +413 val_413 NULL NULL NULL NULL +413 val_413 NULL NULL NULL NULL +414 val_414 NULL NULL NULL NULL +414 val_414 NULL NULL NULL NULL +417 val_417 NULL NULL NULL NULL +417 val_417 NULL NULL NULL NULL +417 val_417 NULL NULL NULL NULL +418 val_418 NULL NULL NULL NULL +419 val_419 NULL NULL NULL NULL +42 val_42 NULL NULL NULL NULL +42 val_42 NULL NULL NULL NULL +421 val_421 NULL NULL NULL NULL +424 val_424 NULL NULL NULL NULL +424 val_424 NULL NULL NULL NULL +427 val_427 NULL NULL NULL NULL +429 val_429 NULL NULL NULL NULL +429 val_429 NULL NULL NULL NULL +43 val_43 NULL NULL NULL NULL +430 val_430 NULL NULL NULL NULL +430 val_430 NULL NULL NULL NULL +430 val_430 NULL NULL NULL NULL +431 val_431 NULL NULL NULL NULL +431 val_431 NULL NULL NULL NULL +431 val_431 NULL NULL NULL NULL +432 val_432 NULL NULL NULL NULL +435 val_435 NULL NULL NULL NULL +436 val_436 NULL NULL NULL NULL +437 val_437 NULL NULL NULL NULL +438 val_438 NULL NULL NULL NULL +438 val_438 NULL NULL NULL NULL +438 val_438 NULL NULL NULL NULL +439 val_439 NULL NULL NULL NULL +439 val_439 NULL NULL NULL NULL +44 val_44 NULL NULL NULL NULL +443 val_443 NULL NULL NULL NULL +444 val_444 NULL NULL NULL NULL +446 val_446 NULL NULL NULL NULL +448 val_448 NULL NULL NULL NULL +449 val_449 NULL NULL NULL NULL +452 val_452 NULL NULL NULL NULL +453 val_453 NULL NULL NULL NULL +454 val_454 NULL NULL NULL NULL +454 val_454 NULL NULL NULL NULL +454 val_454 NULL NULL NULL NULL +455 val_455 NULL NULL NULL NULL +457 val_457 NULL NULL NULL NULL +458 val_458 NULL NULL NULL NULL +458 val_458 NULL NULL NULL NULL +459 val_459 NULL NULL NULL NULL +459 val_459 NULL NULL NULL NULL +460 val_460 NULL NULL NULL NULL +462 val_462 NULL NULL NULL NULL +462 val_462 NULL NULL NULL NULL +463 val_463 NULL NULL NULL NULL +463 val_463 NULL NULL NULL NULL +466 val_466 NULL NULL NULL NULL +466 val_466 NULL NULL NULL NULL +466 val_466 NULL NULL NULL NULL +467 val_467 NULL NULL NULL NULL +468 val_468 NULL NULL NULL NULL +468 val_468 NULL NULL NULL NULL +468 val_468 NULL NULL NULL NULL +468 val_468 NULL NULL NULL NULL +469 val_469 NULL NULL NULL NULL +469 val_469 NULL NULL NULL NULL +469 val_469 NULL NULL NULL NULL +469 val_469 NULL NULL NULL NULL +469 val_469 NULL NULL NULL NULL +47 val_47 NULL NULL NULL NULL +470 val_470 NULL NULL NULL NULL +472 val_472 NULL NULL NULL NULL +475 val_475 NULL NULL NULL NULL +477 val_477 NULL NULL NULL NULL +478 val_478 NULL NULL NULL NULL +478 val_478 NULL NULL NULL NULL +479 val_479 NULL NULL NULL NULL +480 val_480 NULL NULL NULL NULL +480 val_480 NULL NULL NULL NULL +480 val_480 NULL NULL NULL NULL +481 val_481 NULL NULL NULL NULL +482 val_482 NULL NULL NULL NULL +483 val_483 NULL NULL NULL NULL +484 val_484 NULL NULL NULL NULL +485 val_485 NULL NULL NULL NULL +487 val_487 NULL NULL NULL NULL +489 val_489 NULL NULL NULL NULL +489 val_489 NULL NULL NULL NULL +489 val_489 NULL NULL NULL NULL +489 val_489 NULL NULL NULL NULL +490 val_490 NULL NULL NULL NULL +491 val_491 NULL NULL NULL NULL +492 val_492 NULL NULL NULL NULL +492 val_492 NULL NULL NULL NULL +493 val_493 NULL NULL NULL NULL +494 val_494 NULL NULL NULL NULL +495 val_495 NULL NULL NULL NULL +496 val_496 NULL NULL NULL NULL +497 val_497 NULL NULL NULL NULL +498 val_498 NULL NULL NULL NULL +498 val_498 NULL NULL NULL NULL +498 val_498 NULL NULL NULL NULL +5 val_5 NULL NULL NULL NULL +5 val_5 NULL NULL NULL NULL +5 val_5 NULL NULL NULL NULL +51 val_51 NULL NULL NULL NULL +51 val_51 NULL NULL NULL NULL +53 val_53 NULL NULL NULL NULL +54 val_54 NULL NULL NULL NULL +57 val_57 NULL NULL NULL NULL +58 val_58 NULL NULL NULL NULL +58 val_58 NULL NULL NULL NULL +64 val_64 NULL NULL NULL NULL +65 val_65 NULL NULL NULL NULL +66 val_66 NULL NULL NULL NULL +67 val_67 NULL NULL NULL NULL +67 val_67 NULL NULL NULL NULL +69 val_69 NULL NULL NULL NULL +70 val_70 NULL NULL NULL NULL +70 val_70 NULL NULL NULL NULL +70 val_70 NULL NULL NULL NULL +72 val_72 NULL NULL NULL NULL +72 val_72 NULL NULL NULL NULL +74 val_74 NULL NULL NULL NULL +76 val_76 NULL NULL NULL NULL +76 val_76 NULL NULL NULL NULL +77 val_77 NULL NULL NULL NULL +78 val_78 NULL NULL NULL NULL +8 val_8 NULL NULL NULL NULL +80 val_80 NULL NULL NULL NULL +82 val_82 NULL NULL NULL NULL +83 val_83 NULL NULL NULL NULL +83 val_83 NULL NULL NULL NULL +84 val_84 NULL NULL NULL NULL +84 val_84 NULL NULL NULL NULL +85 val_85 NULL NULL NULL NULL +86 val_86 NULL NULL NULL NULL +87 val_87 NULL NULL NULL NULL +9 val_9 NULL NULL NULL NULL +90 val_90 NULL NULL NULL NULL +90 val_90 NULL NULL NULL NULL +90 val_90 NULL NULL NULL NULL +92 val_92 NULL NULL NULL NULL +95 val_95 NULL NULL NULL NULL +95 val_95 NULL NULL NULL NULL +96 val_96 NULL NULL NULL NULL +97 val_97 NULL NULL NULL NULL +97 val_97 NULL NULL NULL NULL +98 val_98 NULL NULL NULL NULL +98 val_98 NULL NULL NULL NULL +PREHOOK: query: explain +SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 > 10)} + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 10 val_10 NULL NULL +NULL NULL 100 val_100 NULL NULL +NULL NULL 100 val_100 NULL NULL +NULL NULL 103 val_103 NULL NULL +NULL NULL 103 val_103 NULL NULL +NULL NULL 104 val_104 NULL NULL +NULL NULL 104 val_104 NULL NULL +NULL NULL 105 val_105 NULL NULL +NULL NULL 11 val_11 NULL NULL +NULL NULL 111 val_111 NULL NULL +NULL NULL 113 val_113 NULL NULL +NULL NULL 113 val_113 NULL NULL +NULL NULL 114 val_114 NULL NULL +NULL NULL 116 val_116 NULL NULL +NULL NULL 118 val_118 NULL NULL +NULL NULL 118 val_118 NULL NULL +NULL NULL 119 val_119 NULL NULL +NULL NULL 119 val_119 NULL NULL +NULL NULL 119 val_119 NULL NULL +NULL NULL 12 val_12 NULL NULL +NULL NULL 12 val_12 NULL NULL +NULL NULL 120 val_120 NULL NULL +NULL NULL 120 val_120 NULL NULL +NULL NULL 125 val_125 NULL NULL +NULL NULL 125 val_125 NULL NULL +NULL NULL 126 val_126 NULL NULL +NULL NULL 128 val_128 NULL NULL +NULL NULL 128 val_128 NULL NULL +NULL NULL 128 val_128 NULL NULL +NULL NULL 129 val_129 NULL NULL +NULL NULL 129 val_129 NULL NULL +NULL NULL 131 val_131 NULL NULL +NULL NULL 133 val_133 NULL NULL +NULL NULL 134 val_134 NULL NULL +NULL NULL 134 val_134 NULL NULL +NULL NULL 136 val_136 NULL NULL +NULL NULL 137 val_137 NULL NULL +NULL NULL 137 val_137 NULL NULL +NULL NULL 138 val_138 NULL NULL +NULL NULL 138 val_138 NULL NULL +NULL NULL 138 val_138 NULL NULL +NULL NULL 138 val_138 NULL NULL +NULL NULL 143 val_143 NULL NULL +NULL NULL 145 val_145 NULL NULL +NULL NULL 146 val_146 NULL NULL +NULL NULL 146 val_146 NULL NULL +NULL NULL 149 val_149 NULL NULL +NULL NULL 149 val_149 NULL NULL +NULL NULL 15 val_15 NULL NULL +NULL NULL 15 val_15 NULL NULL +NULL NULL 150 val_150 NULL NULL +NULL NULL 152 val_152 NULL NULL +NULL NULL 152 val_152 NULL NULL +NULL NULL 153 val_153 NULL NULL +NULL NULL 155 val_155 NULL NULL +NULL NULL 156 val_156 NULL NULL +NULL NULL 157 val_157 NULL NULL +NULL NULL 158 val_158 NULL NULL +NULL NULL 160 val_160 NULL NULL +NULL NULL 162 val_162 NULL NULL +NULL NULL 163 val_163 NULL NULL +NULL NULL 164 val_164 NULL NULL +NULL NULL 164 val_164 NULL NULL +NULL NULL 165 val_165 NULL NULL +NULL NULL 165 val_165 NULL NULL +NULL NULL 166 val_166 NULL NULL +NULL NULL 167 val_167 NULL NULL +NULL NULL 167 val_167 NULL NULL +NULL NULL 167 val_167 NULL NULL +NULL NULL 168 val_168 NULL NULL +NULL NULL 169 val_169 NULL NULL +NULL NULL 169 val_169 NULL NULL +NULL NULL 169 val_169 NULL NULL +NULL NULL 169 val_169 NULL NULL +NULL NULL 17 val_17 NULL NULL +NULL NULL 170 val_170 NULL NULL +NULL NULL 172 val_172 NULL NULL +NULL NULL 172 val_172 NULL NULL +NULL NULL 174 val_174 NULL NULL +NULL NULL 174 val_174 NULL NULL +NULL NULL 175 val_175 NULL NULL +NULL NULL 175 val_175 NULL NULL +NULL NULL 176 val_176 NULL NULL +NULL NULL 176 val_176 NULL NULL +NULL NULL 177 val_177 NULL NULL +NULL NULL 178 val_178 NULL NULL +NULL NULL 179 val_179 NULL NULL +NULL NULL 179 val_179 NULL NULL +NULL NULL 18 val_18 NULL NULL +NULL NULL 18 val_18 NULL NULL +NULL NULL 180 val_180 NULL NULL +NULL NULL 181 val_181 NULL NULL +NULL NULL 183 val_183 NULL NULL +NULL NULL 186 val_186 NULL NULL +NULL NULL 187 val_187 NULL NULL +NULL NULL 187 val_187 NULL NULL +NULL NULL 187 val_187 NULL NULL +NULL NULL 189 val_189 NULL NULL +NULL NULL 19 val_19 NULL NULL +NULL NULL 190 val_190 NULL NULL +NULL NULL 191 val_191 NULL NULL +NULL NULL 191 val_191 NULL NULL +NULL NULL 192 val_192 NULL NULL +NULL NULL 193 val_193 NULL NULL +NULL NULL 193 val_193 NULL NULL +NULL NULL 193 val_193 NULL NULL +NULL NULL 194 val_194 NULL NULL +NULL NULL 195 val_195 NULL NULL +NULL NULL 195 val_195 NULL NULL +NULL NULL 196 val_196 NULL NULL +NULL NULL 197 val_197 NULL NULL +NULL NULL 197 val_197 NULL NULL +NULL NULL 199 val_199 NULL NULL +NULL NULL 199 val_199 NULL NULL +NULL NULL 199 val_199 NULL NULL +NULL NULL 2 val_2 2 val_2 +NULL NULL 20 val_20 NULL NULL +NULL NULL 200 val_200 NULL NULL +NULL NULL 200 val_200 NULL NULL +NULL NULL 201 val_201 NULL NULL +NULL NULL 202 val_202 NULL NULL +NULL NULL 203 val_203 NULL NULL +NULL NULL 203 val_203 NULL NULL +NULL NULL 205 val_205 NULL NULL +NULL NULL 205 val_205 NULL NULL +NULL NULL 207 val_207 NULL NULL +NULL NULL 207 val_207 NULL NULL +NULL NULL 208 val_208 NULL NULL +NULL NULL 208 val_208 NULL NULL +NULL NULL 208 val_208 NULL NULL +NULL NULL 209 val_209 NULL NULL +NULL NULL 209 val_209 NULL NULL +NULL NULL 213 val_213 NULL NULL +NULL NULL 213 val_213 NULL NULL +NULL NULL 214 val_214 NULL NULL +NULL NULL 216 val_216 NULL NULL +NULL NULL 216 val_216 NULL NULL +NULL NULL 217 val_217 NULL NULL +NULL NULL 217 val_217 NULL NULL +NULL NULL 218 val_218 NULL NULL +NULL NULL 219 val_219 NULL NULL +NULL NULL 219 val_219 NULL NULL +NULL NULL 221 val_221 NULL NULL +NULL NULL 221 val_221 NULL NULL +NULL NULL 222 val_222 NULL NULL +NULL NULL 223 val_223 NULL NULL +NULL NULL 223 val_223 NULL NULL +NULL NULL 224 val_224 NULL NULL +NULL NULL 224 val_224 NULL NULL +NULL NULL 226 val_226 NULL NULL +NULL NULL 228 val_228 NULL NULL +NULL NULL 229 val_229 NULL NULL +NULL NULL 229 val_229 NULL NULL +NULL NULL 230 val_230 NULL NULL +NULL NULL 230 val_230 NULL NULL +NULL NULL 230 val_230 NULL NULL +NULL NULL 230 val_230 NULL NULL +NULL NULL 230 val_230 NULL NULL +NULL NULL 233 val_233 NULL NULL +NULL NULL 233 val_233 NULL NULL +NULL NULL 235 val_235 NULL NULL +NULL NULL 237 val_237 NULL NULL +NULL NULL 237 val_237 NULL NULL +NULL NULL 238 val_238 NULL NULL +NULL NULL 238 val_238 NULL NULL +NULL NULL 239 val_239 NULL NULL +NULL NULL 239 val_239 NULL NULL +NULL NULL 24 val_24 NULL NULL +NULL NULL 24 val_24 NULL NULL +NULL NULL 241 val_241 NULL NULL +NULL NULL 242 val_242 NULL NULL +NULL NULL 242 val_242 NULL NULL +NULL NULL 244 val_244 NULL NULL +NULL NULL 247 val_247 NULL NULL +NULL NULL 248 val_248 NULL NULL +NULL NULL 249 val_249 NULL NULL +NULL NULL 252 val_252 NULL NULL +NULL NULL 255 val_255 NULL NULL +NULL NULL 255 val_255 NULL NULL +NULL NULL 256 val_256 NULL NULL +NULL NULL 256 val_256 NULL NULL +NULL NULL 257 val_257 NULL NULL +NULL NULL 258 val_258 NULL NULL +NULL NULL 26 val_26 NULL NULL +NULL NULL 26 val_26 NULL NULL +NULL NULL 260 val_260 NULL NULL +NULL NULL 262 val_262 NULL NULL +NULL NULL 263 val_263 NULL NULL +NULL NULL 265 val_265 NULL NULL +NULL NULL 265 val_265 NULL NULL +NULL NULL 266 val_266 NULL NULL +NULL NULL 27 val_27 NULL NULL +NULL NULL 272 val_272 NULL NULL +NULL NULL 272 val_272 NULL NULL +NULL NULL 273 val_273 NULL NULL +NULL NULL 273 val_273 NULL NULL +NULL NULL 273 val_273 NULL NULL +NULL NULL 274 val_274 NULL NULL +NULL NULL 275 val_275 NULL NULL +NULL NULL 277 val_277 NULL NULL +NULL NULL 277 val_277 NULL NULL +NULL NULL 277 val_277 NULL NULL +NULL NULL 277 val_277 NULL NULL +NULL NULL 278 val_278 NULL NULL +NULL NULL 278 val_278 NULL NULL +NULL NULL 28 val_28 NULL NULL +NULL NULL 280 val_280 NULL NULL +NULL NULL 280 val_280 NULL NULL +NULL NULL 281 val_281 NULL NULL +NULL NULL 281 val_281 NULL NULL +NULL NULL 282 val_282 NULL NULL +NULL NULL 282 val_282 NULL NULL +NULL NULL 283 val_283 NULL NULL +NULL NULL 284 val_284 NULL NULL +NULL NULL 285 val_285 NULL NULL +NULL NULL 286 val_286 NULL NULL +NULL NULL 287 val_287 NULL NULL +NULL NULL 288 val_288 NULL NULL +NULL NULL 288 val_288 NULL NULL +NULL NULL 289 val_289 NULL NULL +NULL NULL 291 val_291 NULL NULL +NULL NULL 292 val_292 NULL NULL +NULL NULL 296 val_296 NULL NULL +NULL NULL 298 val_298 NULL NULL +NULL NULL 298 val_298 NULL NULL +NULL NULL 298 val_298 NULL NULL +NULL NULL 30 val_30 NULL NULL +NULL NULL 302 val_302 NULL NULL +NULL NULL 305 val_305 NULL NULL +NULL NULL 306 val_306 NULL NULL +NULL NULL 307 val_307 NULL NULL +NULL NULL 307 val_307 NULL NULL +NULL NULL 308 val_308 NULL NULL +NULL NULL 309 val_309 NULL NULL +NULL NULL 309 val_309 NULL NULL +NULL NULL 310 val_310 NULL NULL +NULL NULL 311 val_311 NULL NULL +NULL NULL 311 val_311 NULL NULL +NULL NULL 311 val_311 NULL NULL +NULL NULL 315 val_315 NULL NULL +NULL NULL 316 val_316 NULL NULL +NULL NULL 316 val_316 NULL NULL +NULL NULL 316 val_316 NULL NULL +NULL NULL 317 val_317 NULL NULL +NULL NULL 317 val_317 NULL NULL +NULL NULL 318 val_318 NULL NULL +NULL NULL 318 val_318 NULL NULL +NULL NULL 318 val_318 NULL NULL +NULL NULL 321 val_321 NULL NULL +NULL NULL 321 val_321 NULL NULL +NULL NULL 322 val_322 NULL NULL +NULL NULL 322 val_322 NULL NULL +NULL NULL 323 val_323 NULL NULL +NULL NULL 325 val_325 NULL NULL +NULL NULL 325 val_325 NULL NULL +NULL NULL 327 val_327 NULL NULL +NULL NULL 327 val_327 NULL NULL +NULL NULL 327 val_327 NULL NULL +NULL NULL 33 val_33 NULL NULL +NULL NULL 331 val_331 NULL NULL +NULL NULL 331 val_331 NULL NULL +NULL NULL 332 val_332 NULL NULL +NULL NULL 333 val_333 NULL NULL +NULL NULL 333 val_333 NULL NULL +NULL NULL 335 val_335 NULL NULL +NULL NULL 336 val_336 NULL NULL +NULL NULL 338 val_338 NULL NULL +NULL NULL 339 val_339 NULL NULL +NULL NULL 34 val_34 NULL NULL +NULL NULL 341 val_341 NULL NULL +NULL NULL 342 val_342 NULL NULL +NULL NULL 342 val_342 NULL NULL +NULL NULL 344 val_344 NULL NULL +NULL NULL 344 val_344 NULL NULL +NULL NULL 345 val_345 NULL NULL +NULL NULL 348 val_348 NULL NULL +NULL NULL 348 val_348 NULL NULL +NULL NULL 348 val_348 NULL NULL +NULL NULL 348 val_348 NULL NULL +NULL NULL 348 val_348 NULL NULL +NULL NULL 35 val_35 NULL NULL +NULL NULL 35 val_35 NULL NULL +NULL NULL 35 val_35 NULL NULL +NULL NULL 351 val_351 NULL NULL +NULL NULL 353 val_353 NULL NULL +NULL NULL 353 val_353 NULL NULL +NULL NULL 356 val_356 NULL NULL +NULL NULL 360 val_360 NULL NULL +NULL NULL 362 val_362 NULL NULL +NULL NULL 364 val_364 NULL NULL +NULL NULL 365 val_365 NULL NULL +NULL NULL 366 val_366 NULL NULL +NULL NULL 367 val_367 NULL NULL +NULL NULL 367 val_367 NULL NULL +NULL NULL 368 val_368 NULL NULL +NULL NULL 369 val_369 NULL NULL +NULL NULL 369 val_369 NULL NULL +NULL NULL 369 val_369 NULL NULL +NULL NULL 37 val_37 NULL NULL +NULL NULL 37 val_37 NULL NULL +NULL NULL 373 val_373 NULL NULL +NULL NULL 374 val_374 NULL NULL +NULL NULL 375 val_375 NULL NULL +NULL NULL 377 val_377 NULL NULL +NULL NULL 378 val_378 NULL NULL +NULL NULL 379 val_379 NULL NULL +NULL NULL 382 val_382 NULL NULL +NULL NULL 382 val_382 NULL NULL +NULL NULL 384 val_384 NULL NULL +NULL NULL 384 val_384 NULL NULL +NULL NULL 384 val_384 NULL NULL +NULL NULL 386 val_386 NULL NULL +NULL NULL 389 val_389 NULL NULL +NULL NULL 392 val_392 NULL NULL +NULL NULL 393 val_393 NULL NULL +NULL NULL 394 val_394 NULL NULL +NULL NULL 395 val_395 NULL NULL +NULL NULL 395 val_395 NULL NULL +NULL NULL 396 val_396 NULL NULL +NULL NULL 396 val_396 NULL NULL +NULL NULL 396 val_396 NULL NULL +NULL NULL 397 val_397 NULL NULL +NULL NULL 397 val_397 NULL NULL +NULL NULL 399 val_399 NULL NULL +NULL NULL 399 val_399 NULL NULL +NULL NULL 4 val_4 4 val_4 +NULL NULL 400 val_400 NULL NULL +NULL NULL 401 val_401 NULL NULL +NULL NULL 401 val_401 NULL NULL +NULL NULL 401 val_401 NULL NULL +NULL NULL 401 val_401 NULL NULL +NULL NULL 401 val_401 NULL NULL +NULL NULL 402 val_402 NULL NULL +NULL NULL 403 val_403 NULL NULL +NULL NULL 403 val_403 NULL NULL +NULL NULL 403 val_403 NULL NULL +NULL NULL 404 val_404 NULL NULL +NULL NULL 404 val_404 NULL NULL +NULL NULL 406 val_406 NULL NULL +NULL NULL 406 val_406 NULL NULL +NULL NULL 406 val_406 NULL NULL +NULL NULL 406 val_406 NULL NULL +NULL NULL 407 val_407 NULL NULL +NULL NULL 409 val_409 NULL NULL +NULL NULL 409 val_409 NULL NULL +NULL NULL 409 val_409 NULL NULL +NULL NULL 41 val_41 NULL NULL +NULL NULL 411 val_411 NULL NULL +NULL NULL 413 val_413 NULL NULL +NULL NULL 413 val_413 NULL NULL +NULL NULL 414 val_414 NULL NULL +NULL NULL 414 val_414 NULL NULL +NULL NULL 417 val_417 NULL NULL +NULL NULL 417 val_417 NULL NULL +NULL NULL 417 val_417 NULL NULL +NULL NULL 418 val_418 NULL NULL +NULL NULL 419 val_419 NULL NULL +NULL NULL 42 val_42 NULL NULL +NULL NULL 42 val_42 NULL NULL +NULL NULL 421 val_421 NULL NULL +NULL NULL 424 val_424 NULL NULL +NULL NULL 424 val_424 NULL NULL +NULL NULL 427 val_427 NULL NULL +NULL NULL 429 val_429 NULL NULL +NULL NULL 429 val_429 NULL NULL +NULL NULL 43 val_43 NULL NULL +NULL NULL 430 val_430 NULL NULL +NULL NULL 430 val_430 NULL NULL +NULL NULL 430 val_430 NULL NULL +NULL NULL 431 val_431 NULL NULL +NULL NULL 431 val_431 NULL NULL +NULL NULL 431 val_431 NULL NULL +NULL NULL 432 val_432 NULL NULL +NULL NULL 435 val_435 NULL NULL +NULL NULL 436 val_436 NULL NULL +NULL NULL 437 val_437 NULL NULL +NULL NULL 438 val_438 NULL NULL +NULL NULL 438 val_438 NULL NULL +NULL NULL 438 val_438 NULL NULL +NULL NULL 439 val_439 NULL NULL +NULL NULL 439 val_439 NULL NULL +NULL NULL 44 val_44 NULL NULL +NULL NULL 443 val_443 NULL NULL +NULL NULL 444 val_444 NULL NULL +NULL NULL 446 val_446 NULL NULL +NULL NULL 448 val_448 NULL NULL +NULL NULL 449 val_449 NULL NULL +NULL NULL 452 val_452 NULL NULL +NULL NULL 453 val_453 NULL NULL +NULL NULL 454 val_454 NULL NULL +NULL NULL 454 val_454 NULL NULL +NULL NULL 454 val_454 NULL NULL +NULL NULL 455 val_455 NULL NULL +NULL NULL 457 val_457 NULL NULL +NULL NULL 458 val_458 NULL NULL +NULL NULL 458 val_458 NULL NULL +NULL NULL 459 val_459 NULL NULL +NULL NULL 459 val_459 NULL NULL +NULL NULL 460 val_460 NULL NULL +NULL NULL 462 val_462 NULL NULL +NULL NULL 462 val_462 NULL NULL +NULL NULL 463 val_463 NULL NULL +NULL NULL 463 val_463 NULL NULL +NULL NULL 466 val_466 NULL NULL +NULL NULL 466 val_466 NULL NULL +NULL NULL 466 val_466 NULL NULL +NULL NULL 467 val_467 NULL NULL +NULL NULL 468 val_468 NULL NULL +NULL NULL 468 val_468 NULL NULL +NULL NULL 468 val_468 NULL NULL +NULL NULL 468 val_468 NULL NULL +NULL NULL 469 val_469 NULL NULL +NULL NULL 469 val_469 NULL NULL +NULL NULL 469 val_469 NULL NULL +NULL NULL 469 val_469 NULL NULL +NULL NULL 469 val_469 NULL NULL +NULL NULL 47 val_47 NULL NULL +NULL NULL 470 val_470 NULL NULL +NULL NULL 472 val_472 NULL NULL +NULL NULL 475 val_475 NULL NULL +NULL NULL 477 val_477 NULL NULL +NULL NULL 478 val_478 NULL NULL +NULL NULL 478 val_478 NULL NULL +NULL NULL 479 val_479 NULL NULL +NULL NULL 480 val_480 NULL NULL +NULL NULL 480 val_480 NULL NULL +NULL NULL 480 val_480 NULL NULL +NULL NULL 481 val_481 NULL NULL +NULL NULL 482 val_482 NULL NULL +NULL NULL 483 val_483 NULL NULL +NULL NULL 484 val_484 NULL NULL +NULL NULL 485 val_485 NULL NULL +NULL NULL 487 val_487 NULL NULL +NULL NULL 489 val_489 NULL NULL +NULL NULL 489 val_489 NULL NULL +NULL NULL 489 val_489 NULL NULL +NULL NULL 489 val_489 NULL NULL +NULL NULL 490 val_490 NULL NULL +NULL NULL 491 val_491 NULL NULL +NULL NULL 492 val_492 NULL NULL +NULL NULL 492 val_492 NULL NULL +NULL NULL 493 val_493 NULL NULL +NULL NULL 494 val_494 NULL NULL +NULL NULL 495 val_495 NULL NULL +NULL NULL 496 val_496 NULL NULL +NULL NULL 497 val_497 NULL NULL +NULL NULL 498 val_498 NULL NULL +NULL NULL 498 val_498 NULL NULL +NULL NULL 498 val_498 NULL NULL +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 51 val_51 NULL NULL +NULL NULL 51 val_51 NULL NULL +NULL NULL 53 val_53 NULL NULL +NULL NULL 54 val_54 NULL NULL +NULL NULL 57 val_57 NULL NULL +NULL NULL 58 val_58 NULL NULL +NULL NULL 58 val_58 NULL NULL +NULL NULL 64 val_64 NULL NULL +NULL NULL 65 val_65 NULL NULL +NULL NULL 66 val_66 NULL NULL +NULL NULL 67 val_67 NULL NULL +NULL NULL 67 val_67 NULL NULL +NULL NULL 69 val_69 NULL NULL +NULL NULL 70 val_70 NULL NULL +NULL NULL 70 val_70 NULL NULL +NULL NULL 70 val_70 NULL NULL +NULL NULL 72 val_72 NULL NULL +NULL NULL 72 val_72 NULL NULL +NULL NULL 74 val_74 NULL NULL +NULL NULL 76 val_76 NULL NULL +NULL NULL 76 val_76 NULL NULL +NULL NULL 77 val_77 NULL NULL +NULL NULL 78 val_78 NULL NULL +NULL NULL 8 val_8 8 val_8 +NULL NULL 80 val_80 NULL NULL +NULL NULL 82 val_82 NULL NULL +NULL NULL 83 val_83 NULL NULL +NULL NULL 83 val_83 NULL NULL +NULL NULL 84 val_84 NULL NULL +NULL NULL 84 val_84 NULL NULL +NULL NULL 85 val_85 NULL NULL +NULL NULL 86 val_86 NULL NULL +NULL NULL 87 val_87 NULL NULL +NULL NULL 9 val_9 9 val_9 +NULL NULL 90 val_90 NULL NULL +NULL NULL 90 val_90 NULL NULL +NULL NULL 90 val_90 NULL NULL +NULL NULL 92 val_92 NULL NULL +NULL NULL 95 val_95 NULL NULL +NULL NULL 95 val_95 NULL NULL +NULL NULL 96 val_96 NULL NULL +NULL NULL 97 val_97 NULL NULL +NULL NULL 97 val_97 NULL NULL +NULL NULL 98 val_98 NULL NULL +NULL NULL 98 val_98 NULL NULL +PREHOOK: query: explain +SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 > 10)} + 2 {(KEY.reducesinkkey0 < 10)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL NULL NULL NULL 10 val_10 +NULL NULL NULL NULL 100 val_100 +NULL NULL NULL NULL 100 val_100 +NULL NULL NULL NULL 103 val_103 +NULL NULL NULL NULL 103 val_103 +NULL NULL NULL NULL 104 val_104 +NULL NULL NULL NULL 104 val_104 +NULL NULL NULL NULL 105 val_105 +NULL NULL NULL NULL 11 val_11 +NULL NULL NULL NULL 111 val_111 +NULL NULL NULL NULL 113 val_113 +NULL NULL NULL NULL 113 val_113 +NULL NULL NULL NULL 114 val_114 +NULL NULL NULL NULL 116 val_116 +NULL NULL NULL NULL 118 val_118 +NULL NULL NULL NULL 118 val_118 +NULL NULL NULL NULL 119 val_119 +NULL NULL NULL NULL 119 val_119 +NULL NULL NULL NULL 119 val_119 +NULL NULL NULL NULL 12 val_12 +NULL NULL NULL NULL 12 val_12 +NULL NULL NULL NULL 120 val_120 +NULL NULL NULL NULL 120 val_120 +NULL NULL NULL NULL 125 val_125 +NULL NULL NULL NULL 125 val_125 +NULL NULL NULL NULL 126 val_126 +NULL NULL NULL NULL 128 val_128 +NULL NULL NULL NULL 128 val_128 +NULL NULL NULL NULL 128 val_128 +NULL NULL NULL NULL 129 val_129 +NULL NULL NULL NULL 129 val_129 +NULL NULL NULL NULL 131 val_131 +NULL NULL NULL NULL 133 val_133 +NULL NULL NULL NULL 134 val_134 +NULL NULL NULL NULL 134 val_134 +NULL NULL NULL NULL 136 val_136 +NULL NULL NULL NULL 137 val_137 +NULL NULL NULL NULL 137 val_137 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 143 val_143 +NULL NULL NULL NULL 145 val_145 +NULL NULL NULL NULL 146 val_146 +NULL NULL NULL NULL 146 val_146 +NULL NULL NULL NULL 149 val_149 +NULL NULL NULL NULL 149 val_149 +NULL NULL NULL NULL 15 val_15 +NULL NULL NULL NULL 15 val_15 +NULL NULL NULL NULL 150 val_150 +NULL NULL NULL NULL 152 val_152 +NULL NULL NULL NULL 152 val_152 +NULL NULL NULL NULL 153 val_153 +NULL NULL NULL NULL 155 val_155 +NULL NULL NULL NULL 156 val_156 +NULL NULL NULL NULL 157 val_157 +NULL NULL NULL NULL 158 val_158 +NULL NULL NULL NULL 160 val_160 +NULL NULL NULL NULL 162 val_162 +NULL NULL NULL NULL 163 val_163 +NULL NULL NULL NULL 164 val_164 +NULL NULL NULL NULL 164 val_164 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 166 val_166 +NULL NULL NULL NULL 167 val_167 +NULL NULL NULL NULL 167 val_167 +NULL NULL NULL NULL 167 val_167 +NULL NULL NULL NULL 168 val_168 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 17 val_17 +NULL NULL NULL NULL 170 val_170 +NULL NULL NULL NULL 172 val_172 +NULL NULL NULL NULL 172 val_172 +NULL NULL NULL NULL 174 val_174 +NULL NULL NULL NULL 174 val_174 +NULL NULL NULL NULL 175 val_175 +NULL NULL NULL NULL 175 val_175 +NULL NULL NULL NULL 176 val_176 +NULL NULL NULL NULL 176 val_176 +NULL NULL NULL NULL 177 val_177 +NULL NULL NULL NULL 178 val_178 +NULL NULL NULL NULL 179 val_179 +NULL NULL NULL NULL 179 val_179 +NULL NULL NULL NULL 18 val_18 +NULL NULL NULL NULL 18 val_18 +NULL NULL NULL NULL 180 val_180 +NULL NULL NULL NULL 181 val_181 +NULL NULL NULL NULL 183 val_183 +NULL NULL NULL NULL 186 val_186 +NULL NULL NULL NULL 187 val_187 +NULL NULL NULL NULL 187 val_187 +NULL NULL NULL NULL 187 val_187 +NULL NULL NULL NULL 189 val_189 +NULL NULL NULL NULL 19 val_19 +NULL NULL NULL NULL 190 val_190 +NULL NULL NULL NULL 191 val_191 +NULL NULL NULL NULL 191 val_191 +NULL NULL NULL NULL 192 val_192 +NULL NULL NULL NULL 193 val_193 +NULL NULL NULL NULL 193 val_193 +NULL NULL NULL NULL 193 val_193 +NULL NULL NULL NULL 194 val_194 +NULL NULL NULL NULL 195 val_195 +NULL NULL NULL NULL 195 val_195 +NULL NULL NULL NULL 196 val_196 +NULL NULL NULL NULL 197 val_197 +NULL NULL NULL NULL 197 val_197 +NULL NULL NULL NULL 199 val_199 +NULL NULL NULL NULL 199 val_199 +NULL NULL NULL NULL 199 val_199 +NULL NULL NULL NULL 20 val_20 +NULL NULL NULL NULL 200 val_200 +NULL NULL NULL NULL 200 val_200 +NULL NULL NULL NULL 201 val_201 +NULL NULL NULL NULL 202 val_202 +NULL NULL NULL NULL 203 val_203 +NULL NULL NULL NULL 203 val_203 +NULL NULL NULL NULL 205 val_205 +NULL NULL NULL NULL 205 val_205 +NULL NULL NULL NULL 207 val_207 +NULL NULL NULL NULL 207 val_207 +NULL NULL NULL NULL 208 val_208 +NULL NULL NULL NULL 208 val_208 +NULL NULL NULL NULL 208 val_208 +NULL NULL NULL NULL 209 val_209 +NULL NULL NULL NULL 209 val_209 +NULL NULL NULL NULL 213 val_213 +NULL NULL NULL NULL 213 val_213 +NULL NULL NULL NULL 214 val_214 +NULL NULL NULL NULL 216 val_216 +NULL NULL NULL NULL 216 val_216 +NULL NULL NULL NULL 217 val_217 +NULL NULL NULL NULL 217 val_217 +NULL NULL NULL NULL 218 val_218 +NULL NULL NULL NULL 219 val_219 +NULL NULL NULL NULL 219 val_219 +NULL NULL NULL NULL 221 val_221 +NULL NULL NULL NULL 221 val_221 +NULL NULL NULL NULL 222 val_222 +NULL NULL NULL NULL 223 val_223 +NULL NULL NULL NULL 223 val_223 +NULL NULL NULL NULL 224 val_224 +NULL NULL NULL NULL 224 val_224 +NULL NULL NULL NULL 226 val_226 +NULL NULL NULL NULL 228 val_228 +NULL NULL NULL NULL 229 val_229 +NULL NULL NULL NULL 229 val_229 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 233 val_233 +NULL NULL NULL NULL 233 val_233 +NULL NULL NULL NULL 235 val_235 +NULL NULL NULL NULL 237 val_237 +NULL NULL NULL NULL 237 val_237 +NULL NULL NULL NULL 238 val_238 +NULL NULL NULL NULL 238 val_238 +NULL NULL NULL NULL 239 val_239 +NULL NULL NULL NULL 239 val_239 +NULL NULL NULL NULL 24 val_24 +NULL NULL NULL NULL 24 val_24 +NULL NULL NULL NULL 241 val_241 +NULL NULL NULL NULL 242 val_242 +NULL NULL NULL NULL 242 val_242 +NULL NULL NULL NULL 244 val_244 +NULL NULL NULL NULL 247 val_247 +NULL NULL NULL NULL 248 val_248 +NULL NULL NULL NULL 249 val_249 +NULL NULL NULL NULL 252 val_252 +NULL NULL NULL NULL 255 val_255 +NULL NULL NULL NULL 255 val_255 +NULL NULL NULL NULL 256 val_256 +NULL NULL NULL NULL 256 val_256 +NULL NULL NULL NULL 257 val_257 +NULL NULL NULL NULL 258 val_258 +NULL NULL NULL NULL 26 val_26 +NULL NULL NULL NULL 26 val_26 +NULL NULL NULL NULL 260 val_260 +NULL NULL NULL NULL 262 val_262 +NULL NULL NULL NULL 263 val_263 +NULL NULL NULL NULL 265 val_265 +NULL NULL NULL NULL 265 val_265 +NULL NULL NULL NULL 266 val_266 +NULL NULL NULL NULL 27 val_27 +NULL NULL NULL NULL 272 val_272 +NULL NULL NULL NULL 272 val_272 +NULL NULL NULL NULL 273 val_273 +NULL NULL NULL NULL 273 val_273 +NULL NULL NULL NULL 273 val_273 +NULL NULL NULL NULL 274 val_274 +NULL NULL NULL NULL 275 val_275 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 278 val_278 +NULL NULL NULL NULL 278 val_278 +NULL NULL NULL NULL 28 val_28 +NULL NULL NULL NULL 280 val_280 +NULL NULL NULL NULL 280 val_280 +NULL NULL NULL NULL 281 val_281 +NULL NULL NULL NULL 281 val_281 +NULL NULL NULL NULL 282 val_282 +NULL NULL NULL NULL 282 val_282 +NULL NULL NULL NULL 283 val_283 +NULL NULL NULL NULL 284 val_284 +NULL NULL NULL NULL 285 val_285 +NULL NULL NULL NULL 286 val_286 +NULL NULL NULL NULL 287 val_287 +NULL NULL NULL NULL 288 val_288 +NULL NULL NULL NULL 288 val_288 +NULL NULL NULL NULL 289 val_289 +NULL NULL NULL NULL 291 val_291 +NULL NULL NULL NULL 292 val_292 +NULL NULL NULL NULL 296 val_296 +NULL NULL NULL NULL 298 val_298 +NULL NULL NULL NULL 298 val_298 +NULL NULL NULL NULL 298 val_298 +NULL NULL NULL NULL 30 val_30 +NULL NULL NULL NULL 302 val_302 +NULL NULL NULL NULL 305 val_305 +NULL NULL NULL NULL 306 val_306 +NULL NULL NULL NULL 307 val_307 +NULL NULL NULL NULL 307 val_307 +NULL NULL NULL NULL 308 val_308 +NULL NULL NULL NULL 309 val_309 +NULL NULL NULL NULL 309 val_309 +NULL NULL NULL NULL 310 val_310 +NULL NULL NULL NULL 311 val_311 +NULL NULL NULL NULL 311 val_311 +NULL NULL NULL NULL 311 val_311 +NULL NULL NULL NULL 315 val_315 +NULL NULL NULL NULL 316 val_316 +NULL NULL NULL NULL 316 val_316 +NULL NULL NULL NULL 316 val_316 +NULL NULL NULL NULL 317 val_317 +NULL NULL NULL NULL 317 val_317 +NULL NULL NULL NULL 318 val_318 +NULL NULL NULL NULL 318 val_318 +NULL NULL NULL NULL 318 val_318 +NULL NULL NULL NULL 321 val_321 +NULL NULL NULL NULL 321 val_321 +NULL NULL NULL NULL 322 val_322 +NULL NULL NULL NULL 322 val_322 +NULL NULL NULL NULL 323 val_323 +NULL NULL NULL NULL 325 val_325 +NULL NULL NULL NULL 325 val_325 +NULL NULL NULL NULL 327 val_327 +NULL NULL NULL NULL 327 val_327 +NULL NULL NULL NULL 327 val_327 +NULL NULL NULL NULL 33 val_33 +NULL NULL NULL NULL 331 val_331 +NULL NULL NULL NULL 331 val_331 +NULL NULL NULL NULL 332 val_332 +NULL NULL NULL NULL 333 val_333 +NULL NULL NULL NULL 333 val_333 +NULL NULL NULL NULL 335 val_335 +NULL NULL NULL NULL 336 val_336 +NULL NULL NULL NULL 338 val_338 +NULL NULL NULL NULL 339 val_339 +NULL NULL NULL NULL 34 val_34 +NULL NULL NULL NULL 341 val_341 +NULL NULL NULL NULL 342 val_342 +NULL NULL NULL NULL 342 val_342 +NULL NULL NULL NULL 344 val_344 +NULL NULL NULL NULL 344 val_344 +NULL NULL NULL NULL 345 val_345 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 35 val_35 +NULL NULL NULL NULL 35 val_35 +NULL NULL NULL NULL 35 val_35 +NULL NULL NULL NULL 351 val_351 +NULL NULL NULL NULL 353 val_353 +NULL NULL NULL NULL 353 val_353 +NULL NULL NULL NULL 356 val_356 +NULL NULL NULL NULL 360 val_360 +NULL NULL NULL NULL 362 val_362 +NULL NULL NULL NULL 364 val_364 +NULL NULL NULL NULL 365 val_365 +NULL NULL NULL NULL 366 val_366 +NULL NULL NULL NULL 367 val_367 +NULL NULL NULL NULL 367 val_367 +NULL NULL NULL NULL 368 val_368 +NULL NULL NULL NULL 369 val_369 +NULL NULL NULL NULL 369 val_369 +NULL NULL NULL NULL 369 val_369 +NULL NULL NULL NULL 37 val_37 +NULL NULL NULL NULL 37 val_37 +NULL NULL NULL NULL 373 val_373 +NULL NULL NULL NULL 374 val_374 +NULL NULL NULL NULL 375 val_375 +NULL NULL NULL NULL 377 val_377 +NULL NULL NULL NULL 378 val_378 +NULL NULL NULL NULL 379 val_379 +NULL NULL NULL NULL 382 val_382 +NULL NULL NULL NULL 382 val_382 +NULL NULL NULL NULL 384 val_384 +NULL NULL NULL NULL 384 val_384 +NULL NULL NULL NULL 384 val_384 +NULL NULL NULL NULL 386 val_386 +NULL NULL NULL NULL 389 val_389 +NULL NULL NULL NULL 392 val_392 +NULL NULL NULL NULL 393 val_393 +NULL NULL NULL NULL 394 val_394 +NULL NULL NULL NULL 395 val_395 +NULL NULL NULL NULL 395 val_395 +NULL NULL NULL NULL 396 val_396 +NULL NULL NULL NULL 396 val_396 +NULL NULL NULL NULL 396 val_396 +NULL NULL NULL NULL 397 val_397 +NULL NULL NULL NULL 397 val_397 +NULL NULL NULL NULL 399 val_399 +NULL NULL NULL NULL 399 val_399 +NULL NULL NULL NULL 400 val_400 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 402 val_402 +NULL NULL NULL NULL 403 val_403 +NULL NULL NULL NULL 403 val_403 +NULL NULL NULL NULL 403 val_403 +NULL NULL NULL NULL 404 val_404 +NULL NULL NULL NULL 404 val_404 +NULL NULL NULL NULL 406 val_406 +NULL NULL NULL NULL 406 val_406 +NULL NULL NULL NULL 406 val_406 +NULL NULL NULL NULL 406 val_406 +NULL NULL NULL NULL 407 val_407 +NULL NULL NULL NULL 409 val_409 +NULL NULL NULL NULL 409 val_409 +NULL NULL NULL NULL 409 val_409 +NULL NULL NULL NULL 41 val_41 +NULL NULL NULL NULL 411 val_411 +NULL NULL NULL NULL 413 val_413 +NULL NULL NULL NULL 413 val_413 +NULL NULL NULL NULL 414 val_414 +NULL NULL NULL NULL 414 val_414 +NULL NULL NULL NULL 417 val_417 +NULL NULL NULL NULL 417 val_417 +NULL NULL NULL NULL 417 val_417 +NULL NULL NULL NULL 418 val_418 +NULL NULL NULL NULL 419 val_419 +NULL NULL NULL NULL 42 val_42 +NULL NULL NULL NULL 42 val_42 +NULL NULL NULL NULL 421 val_421 +NULL NULL NULL NULL 424 val_424 +NULL NULL NULL NULL 424 val_424 +NULL NULL NULL NULL 427 val_427 +NULL NULL NULL NULL 429 val_429 +NULL NULL NULL NULL 429 val_429 +NULL NULL NULL NULL 43 val_43 +NULL NULL NULL NULL 430 val_430 +NULL NULL NULL NULL 430 val_430 +NULL NULL NULL NULL 430 val_430 +NULL NULL NULL NULL 431 val_431 +NULL NULL NULL NULL 431 val_431 +NULL NULL NULL NULL 431 val_431 +NULL NULL NULL NULL 432 val_432 +NULL NULL NULL NULL 435 val_435 +NULL NULL NULL NULL 436 val_436 +NULL NULL NULL NULL 437 val_437 +NULL NULL NULL NULL 438 val_438 +NULL NULL NULL NULL 438 val_438 +NULL NULL NULL NULL 438 val_438 +NULL NULL NULL NULL 439 val_439 +NULL NULL NULL NULL 439 val_439 +NULL NULL NULL NULL 44 val_44 +NULL NULL NULL NULL 443 val_443 +NULL NULL NULL NULL 444 val_444 +NULL NULL NULL NULL 446 val_446 +NULL NULL NULL NULL 448 val_448 +NULL NULL NULL NULL 449 val_449 +NULL NULL NULL NULL 452 val_452 +NULL NULL NULL NULL 453 val_453 +NULL NULL NULL NULL 454 val_454 +NULL NULL NULL NULL 454 val_454 +NULL NULL NULL NULL 454 val_454 +NULL NULL NULL NULL 455 val_455 +NULL NULL NULL NULL 457 val_457 +NULL NULL NULL NULL 458 val_458 +NULL NULL NULL NULL 458 val_458 +NULL NULL NULL NULL 459 val_459 +NULL NULL NULL NULL 459 val_459 +NULL NULL NULL NULL 460 val_460 +NULL NULL NULL NULL 462 val_462 +NULL NULL NULL NULL 462 val_462 +NULL NULL NULL NULL 463 val_463 +NULL NULL NULL NULL 463 val_463 +NULL NULL NULL NULL 466 val_466 +NULL NULL NULL NULL 466 val_466 +NULL NULL NULL NULL 466 val_466 +NULL NULL NULL NULL 467 val_467 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 47 val_47 +NULL NULL NULL NULL 470 val_470 +NULL NULL NULL NULL 472 val_472 +NULL NULL NULL NULL 475 val_475 +NULL NULL NULL NULL 477 val_477 +NULL NULL NULL NULL 478 val_478 +NULL NULL NULL NULL 478 val_478 +NULL NULL NULL NULL 479 val_479 +NULL NULL NULL NULL 480 val_480 +NULL NULL NULL NULL 480 val_480 +NULL NULL NULL NULL 480 val_480 +NULL NULL NULL NULL 481 val_481 +NULL NULL NULL NULL 482 val_482 +NULL NULL NULL NULL 483 val_483 +NULL NULL NULL NULL 484 val_484 +NULL NULL NULL NULL 485 val_485 +NULL NULL NULL NULL 487 val_487 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 490 val_490 +NULL NULL NULL NULL 491 val_491 +NULL NULL NULL NULL 492 val_492 +NULL NULL NULL NULL 492 val_492 +NULL NULL NULL NULL 493 val_493 +NULL NULL NULL NULL 494 val_494 +NULL NULL NULL NULL 495 val_495 +NULL NULL NULL NULL 496 val_496 +NULL NULL NULL NULL 497 val_497 +NULL NULL NULL NULL 498 val_498 +NULL NULL NULL NULL 498 val_498 +NULL NULL NULL NULL 498 val_498 +NULL NULL NULL NULL 51 val_51 +NULL NULL NULL NULL 51 val_51 +NULL NULL NULL NULL 53 val_53 +NULL NULL NULL NULL 54 val_54 +NULL NULL NULL NULL 57 val_57 +NULL NULL NULL NULL 58 val_58 +NULL NULL NULL NULL 58 val_58 +NULL NULL NULL NULL 64 val_64 +NULL NULL NULL NULL 65 val_65 +NULL NULL NULL NULL 66 val_66 +NULL NULL NULL NULL 67 val_67 +NULL NULL NULL NULL 67 val_67 +NULL NULL NULL NULL 69 val_69 +NULL NULL NULL NULL 70 val_70 +NULL NULL NULL NULL 70 val_70 +NULL NULL NULL NULL 70 val_70 +NULL NULL NULL NULL 72 val_72 +NULL NULL NULL NULL 72 val_72 +NULL NULL NULL NULL 74 val_74 +NULL NULL NULL NULL 76 val_76 +NULL NULL NULL NULL 76 val_76 +NULL NULL NULL NULL 77 val_77 +NULL NULL NULL NULL 78 val_78 +NULL NULL NULL NULL 80 val_80 +NULL NULL NULL NULL 82 val_82 +NULL NULL NULL NULL 83 val_83 +NULL NULL NULL NULL 83 val_83 +NULL NULL NULL NULL 84 val_84 +NULL NULL NULL NULL 84 val_84 +NULL NULL NULL NULL 85 val_85 +NULL NULL NULL NULL 86 val_86 +NULL NULL NULL NULL 87 val_87 +NULL NULL NULL NULL 90 val_90 +NULL NULL NULL NULL 90 val_90 +NULL NULL NULL NULL 90 val_90 +NULL NULL NULL NULL 92 val_92 +NULL NULL NULL NULL 95 val_95 +NULL NULL NULL NULL 95 val_95 +NULL NULL NULL NULL 96 val_96 +NULL NULL NULL NULL 97 val_97 +NULL NULL NULL NULL 97 val_97 +NULL NULL NULL NULL 98 val_98 +NULL NULL NULL NULL 98 val_98 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 2 val_2 2 val_2 +NULL NULL 4 val_4 4 val_4 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 8 val_8 8 val_8 +NULL NULL 9 val_9 9 val_9 +PREHOOK: query: explain +SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 10) and (key > 10)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 10)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 10)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: explain +SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 10) and (key > 10)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 10)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 + 2 {(KEY.reducesinkkey0 < 10)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL NULL NULL NULL 0 val_0 +NULL NULL NULL NULL 0 val_0 +NULL NULL NULL NULL 0 val_0 +NULL NULL NULL NULL 10 val_10 +NULL NULL NULL NULL 100 val_100 +NULL NULL NULL NULL 100 val_100 +NULL NULL NULL NULL 103 val_103 +NULL NULL NULL NULL 103 val_103 +NULL NULL NULL NULL 104 val_104 +NULL NULL NULL NULL 104 val_104 +NULL NULL NULL NULL 105 val_105 +NULL NULL NULL NULL 11 val_11 +NULL NULL NULL NULL 111 val_111 +NULL NULL NULL NULL 113 val_113 +NULL NULL NULL NULL 113 val_113 +NULL NULL NULL NULL 114 val_114 +NULL NULL NULL NULL 116 val_116 +NULL NULL NULL NULL 118 val_118 +NULL NULL NULL NULL 118 val_118 +NULL NULL NULL NULL 119 val_119 +NULL NULL NULL NULL 119 val_119 +NULL NULL NULL NULL 119 val_119 +NULL NULL NULL NULL 12 val_12 +NULL NULL NULL NULL 12 val_12 +NULL NULL NULL NULL 120 val_120 +NULL NULL NULL NULL 120 val_120 +NULL NULL NULL NULL 125 val_125 +NULL NULL NULL NULL 125 val_125 +NULL NULL NULL NULL 126 val_126 +NULL NULL NULL NULL 128 val_128 +NULL NULL NULL NULL 128 val_128 +NULL NULL NULL NULL 128 val_128 +NULL NULL NULL NULL 129 val_129 +NULL NULL NULL NULL 129 val_129 +NULL NULL NULL NULL 131 val_131 +NULL NULL NULL NULL 133 val_133 +NULL NULL NULL NULL 134 val_134 +NULL NULL NULL NULL 134 val_134 +NULL NULL NULL NULL 136 val_136 +NULL NULL NULL NULL 137 val_137 +NULL NULL NULL NULL 137 val_137 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 143 val_143 +NULL NULL NULL NULL 145 val_145 +NULL NULL NULL NULL 146 val_146 +NULL NULL NULL NULL 146 val_146 +NULL NULL NULL NULL 149 val_149 +NULL NULL NULL NULL 149 val_149 +NULL NULL NULL NULL 15 val_15 +NULL NULL NULL NULL 15 val_15 +NULL NULL NULL NULL 150 val_150 +NULL NULL NULL NULL 152 val_152 +NULL NULL NULL NULL 152 val_152 +NULL NULL NULL NULL 153 val_153 +NULL NULL NULL NULL 155 val_155 +NULL NULL NULL NULL 156 val_156 +NULL NULL NULL NULL 157 val_157 +NULL NULL NULL NULL 158 val_158 +NULL NULL NULL NULL 160 val_160 +NULL NULL NULL NULL 162 val_162 +NULL NULL NULL NULL 163 val_163 +NULL NULL NULL NULL 164 val_164 +NULL NULL NULL NULL 164 val_164 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 166 val_166 +NULL NULL NULL NULL 167 val_167 +NULL NULL NULL NULL 167 val_167 +NULL NULL NULL NULL 167 val_167 +NULL NULL NULL NULL 168 val_168 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 17 val_17 +NULL NULL NULL NULL 170 val_170 +NULL NULL NULL NULL 172 val_172 +NULL NULL NULL NULL 172 val_172 +NULL NULL NULL NULL 174 val_174 +NULL NULL NULL NULL 174 val_174 +NULL NULL NULL NULL 175 val_175 +NULL NULL NULL NULL 175 val_175 +NULL NULL NULL NULL 176 val_176 +NULL NULL NULL NULL 176 val_176 +NULL NULL NULL NULL 177 val_177 +NULL NULL NULL NULL 178 val_178 +NULL NULL NULL NULL 179 val_179 +NULL NULL NULL NULL 179 val_179 +NULL NULL NULL NULL 18 val_18 +NULL NULL NULL NULL 18 val_18 +NULL NULL NULL NULL 180 val_180 +NULL NULL NULL NULL 181 val_181 +NULL NULL NULL NULL 183 val_183 +NULL NULL NULL NULL 186 val_186 +NULL NULL NULL NULL 187 val_187 +NULL NULL NULL NULL 187 val_187 +NULL NULL NULL NULL 187 val_187 +NULL NULL NULL NULL 189 val_189 +NULL NULL NULL NULL 19 val_19 +NULL NULL NULL NULL 190 val_190 +NULL NULL NULL NULL 191 val_191 +NULL NULL NULL NULL 191 val_191 +NULL NULL NULL NULL 192 val_192 +NULL NULL NULL NULL 193 val_193 +NULL NULL NULL NULL 193 val_193 +NULL NULL NULL NULL 193 val_193 +NULL NULL NULL NULL 194 val_194 +NULL NULL NULL NULL 195 val_195 +NULL NULL NULL NULL 195 val_195 +NULL NULL NULL NULL 196 val_196 +NULL NULL NULL NULL 197 val_197 +NULL NULL NULL NULL 197 val_197 +NULL NULL NULL NULL 199 val_199 +NULL NULL NULL NULL 199 val_199 +NULL NULL NULL NULL 199 val_199 +NULL NULL NULL NULL 2 val_2 +NULL NULL NULL NULL 20 val_20 +NULL NULL NULL NULL 200 val_200 +NULL NULL NULL NULL 200 val_200 +NULL NULL NULL NULL 201 val_201 +NULL NULL NULL NULL 202 val_202 +NULL NULL NULL NULL 203 val_203 +NULL NULL NULL NULL 203 val_203 +NULL NULL NULL NULL 205 val_205 +NULL NULL NULL NULL 205 val_205 +NULL NULL NULL NULL 207 val_207 +NULL NULL NULL NULL 207 val_207 +NULL NULL NULL NULL 208 val_208 +NULL NULL NULL NULL 208 val_208 +NULL NULL NULL NULL 208 val_208 +NULL NULL NULL NULL 209 val_209 +NULL NULL NULL NULL 209 val_209 +NULL NULL NULL NULL 213 val_213 +NULL NULL NULL NULL 213 val_213 +NULL NULL NULL NULL 214 val_214 +NULL NULL NULL NULL 216 val_216 +NULL NULL NULL NULL 216 val_216 +NULL NULL NULL NULL 217 val_217 +NULL NULL NULL NULL 217 val_217 +NULL NULL NULL NULL 218 val_218 +NULL NULL NULL NULL 219 val_219 +NULL NULL NULL NULL 219 val_219 +NULL NULL NULL NULL 221 val_221 +NULL NULL NULL NULL 221 val_221 +NULL NULL NULL NULL 222 val_222 +NULL NULL NULL NULL 223 val_223 +NULL NULL NULL NULL 223 val_223 +NULL NULL NULL NULL 224 val_224 +NULL NULL NULL NULL 224 val_224 +NULL NULL NULL NULL 226 val_226 +NULL NULL NULL NULL 228 val_228 +NULL NULL NULL NULL 229 val_229 +NULL NULL NULL NULL 229 val_229 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 233 val_233 +NULL NULL NULL NULL 233 val_233 +NULL NULL NULL NULL 235 val_235 +NULL NULL NULL NULL 237 val_237 +NULL NULL NULL NULL 237 val_237 +NULL NULL NULL NULL 238 val_238 +NULL NULL NULL NULL 238 val_238 +NULL NULL NULL NULL 239 val_239 +NULL NULL NULL NULL 239 val_239 +NULL NULL NULL NULL 24 val_24 +NULL NULL NULL NULL 24 val_24 +NULL NULL NULL NULL 241 val_241 +NULL NULL NULL NULL 242 val_242 +NULL NULL NULL NULL 242 val_242 +NULL NULL NULL NULL 244 val_244 +NULL NULL NULL NULL 247 val_247 +NULL NULL NULL NULL 248 val_248 +NULL NULL NULL NULL 249 val_249 +NULL NULL NULL NULL 252 val_252 +NULL NULL NULL NULL 255 val_255 +NULL NULL NULL NULL 255 val_255 +NULL NULL NULL NULL 256 val_256 +NULL NULL NULL NULL 256 val_256 +NULL NULL NULL NULL 257 val_257 +NULL NULL NULL NULL 258 val_258 +NULL NULL NULL NULL 26 val_26 +NULL NULL NULL NULL 26 val_26 +NULL NULL NULL NULL 260 val_260 +NULL NULL NULL NULL 262 val_262 +NULL NULL NULL NULL 263 val_263 +NULL NULL NULL NULL 265 val_265 +NULL NULL NULL NULL 265 val_265 +NULL NULL NULL NULL 266 val_266 +NULL NULL NULL NULL 27 val_27 +NULL NULL NULL NULL 272 val_272 +NULL NULL NULL NULL 272 val_272 +NULL NULL NULL NULL 273 val_273 +NULL NULL NULL NULL 273 val_273 +NULL NULL NULL NULL 273 val_273 +NULL NULL NULL NULL 274 val_274 +NULL NULL NULL NULL 275 val_275 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 278 val_278 +NULL NULL NULL NULL 278 val_278 +NULL NULL NULL NULL 28 val_28 +NULL NULL NULL NULL 280 val_280 +NULL NULL NULL NULL 280 val_280 +NULL NULL NULL NULL 281 val_281 +NULL NULL NULL NULL 281 val_281 +NULL NULL NULL NULL 282 val_282 +NULL NULL NULL NULL 282 val_282 +NULL NULL NULL NULL 283 val_283 +NULL NULL NULL NULL 284 val_284 +NULL NULL NULL NULL 285 val_285 +NULL NULL NULL NULL 286 val_286 +NULL NULL NULL NULL 287 val_287 +NULL NULL NULL NULL 288 val_288 +NULL NULL NULL NULL 288 val_288 +NULL NULL NULL NULL 289 val_289 +NULL NULL NULL NULL 291 val_291 +NULL NULL NULL NULL 292 val_292 +NULL NULL NULL NULL 296 val_296 +NULL NULL NULL NULL 298 val_298 +NULL NULL NULL NULL 298 val_298 +NULL NULL NULL NULL 298 val_298 +NULL NULL NULL NULL 30 val_30 +NULL NULL NULL NULL 302 val_302 +NULL NULL NULL NULL 305 val_305 +NULL NULL NULL NULL 306 val_306 +NULL NULL NULL NULL 307 val_307 +NULL NULL NULL NULL 307 val_307 +NULL NULL NULL NULL 308 val_308 +NULL NULL NULL NULL 309 val_309 +NULL NULL NULL NULL 309 val_309 +NULL NULL NULL NULL 310 val_310 +NULL NULL NULL NULL 311 val_311 +NULL NULL NULL NULL 311 val_311 +NULL NULL NULL NULL 311 val_311 +NULL NULL NULL NULL 315 val_315 +NULL NULL NULL NULL 316 val_316 +NULL NULL NULL NULL 316 val_316 +NULL NULL NULL NULL 316 val_316 +NULL NULL NULL NULL 317 val_317 +NULL NULL NULL NULL 317 val_317 +NULL NULL NULL NULL 318 val_318 +NULL NULL NULL NULL 318 val_318 +NULL NULL NULL NULL 318 val_318 +NULL NULL NULL NULL 321 val_321 +NULL NULL NULL NULL 321 val_321 +NULL NULL NULL NULL 322 val_322 +NULL NULL NULL NULL 322 val_322 +NULL NULL NULL NULL 323 val_323 +NULL NULL NULL NULL 325 val_325 +NULL NULL NULL NULL 325 val_325 +NULL NULL NULL NULL 327 val_327 +NULL NULL NULL NULL 327 val_327 +NULL NULL NULL NULL 327 val_327 +NULL NULL NULL NULL 33 val_33 +NULL NULL NULL NULL 331 val_331 +NULL NULL NULL NULL 331 val_331 +NULL NULL NULL NULL 332 val_332 +NULL NULL NULL NULL 333 val_333 +NULL NULL NULL NULL 333 val_333 +NULL NULL NULL NULL 335 val_335 +NULL NULL NULL NULL 336 val_336 +NULL NULL NULL NULL 338 val_338 +NULL NULL NULL NULL 339 val_339 +NULL NULL NULL NULL 34 val_34 +NULL NULL NULL NULL 341 val_341 +NULL NULL NULL NULL 342 val_342 +NULL NULL NULL NULL 342 val_342 +NULL NULL NULL NULL 344 val_344 +NULL NULL NULL NULL 344 val_344 +NULL NULL NULL NULL 345 val_345 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 35 val_35 +NULL NULL NULL NULL 35 val_35 +NULL NULL NULL NULL 35 val_35 +NULL NULL NULL NULL 351 val_351 +NULL NULL NULL NULL 353 val_353 +NULL NULL NULL NULL 353 val_353 +NULL NULL NULL NULL 356 val_356 +NULL NULL NULL NULL 360 val_360 +NULL NULL NULL NULL 362 val_362 +NULL NULL NULL NULL 364 val_364 +NULL NULL NULL NULL 365 val_365 +NULL NULL NULL NULL 366 val_366 +NULL NULL NULL NULL 367 val_367 +NULL NULL NULL NULL 367 val_367 +NULL NULL NULL NULL 368 val_368 +NULL NULL NULL NULL 369 val_369 +NULL NULL NULL NULL 369 val_369 +NULL NULL NULL NULL 369 val_369 +NULL NULL NULL NULL 37 val_37 +NULL NULL NULL NULL 37 val_37 +NULL NULL NULL NULL 373 val_373 +NULL NULL NULL NULL 374 val_374 +NULL NULL NULL NULL 375 val_375 +NULL NULL NULL NULL 377 val_377 +NULL NULL NULL NULL 378 val_378 +NULL NULL NULL NULL 379 val_379 +NULL NULL NULL NULL 382 val_382 +NULL NULL NULL NULL 382 val_382 +NULL NULL NULL NULL 384 val_384 +NULL NULL NULL NULL 384 val_384 +NULL NULL NULL NULL 384 val_384 +NULL NULL NULL NULL 386 val_386 +NULL NULL NULL NULL 389 val_389 +NULL NULL NULL NULL 392 val_392 +NULL NULL NULL NULL 393 val_393 +NULL NULL NULL NULL 394 val_394 +NULL NULL NULL NULL 395 val_395 +NULL NULL NULL NULL 395 val_395 +NULL NULL NULL NULL 396 val_396 +NULL NULL NULL NULL 396 val_396 +NULL NULL NULL NULL 396 val_396 +NULL NULL NULL NULL 397 val_397 +NULL NULL NULL NULL 397 val_397 +NULL NULL NULL NULL 399 val_399 +NULL NULL NULL NULL 399 val_399 +NULL NULL NULL NULL 4 val_4 +NULL NULL NULL NULL 400 val_400 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 401 val_401 +NULL NULL NULL NULL 402 val_402 +NULL NULL NULL NULL 403 val_403 +NULL NULL NULL NULL 403 val_403 +NULL NULL NULL NULL 403 val_403 +NULL NULL NULL NULL 404 val_404 +NULL NULL NULL NULL 404 val_404 +NULL NULL NULL NULL 406 val_406 +NULL NULL NULL NULL 406 val_406 +NULL NULL NULL NULL 406 val_406 +NULL NULL NULL NULL 406 val_406 +NULL NULL NULL NULL 407 val_407 +NULL NULL NULL NULL 409 val_409 +NULL NULL NULL NULL 409 val_409 +NULL NULL NULL NULL 409 val_409 +NULL NULL NULL NULL 41 val_41 +NULL NULL NULL NULL 411 val_411 +NULL NULL NULL NULL 413 val_413 +NULL NULL NULL NULL 413 val_413 +NULL NULL NULL NULL 414 val_414 +NULL NULL NULL NULL 414 val_414 +NULL NULL NULL NULL 417 val_417 +NULL NULL NULL NULL 417 val_417 +NULL NULL NULL NULL 417 val_417 +NULL NULL NULL NULL 418 val_418 +NULL NULL NULL NULL 419 val_419 +NULL NULL NULL NULL 42 val_42 +NULL NULL NULL NULL 42 val_42 +NULL NULL NULL NULL 421 val_421 +NULL NULL NULL NULL 424 val_424 +NULL NULL NULL NULL 424 val_424 +NULL NULL NULL NULL 427 val_427 +NULL NULL NULL NULL 429 val_429 +NULL NULL NULL NULL 429 val_429 +NULL NULL NULL NULL 43 val_43 +NULL NULL NULL NULL 430 val_430 +NULL NULL NULL NULL 430 val_430 +NULL NULL NULL NULL 430 val_430 +NULL NULL NULL NULL 431 val_431 +NULL NULL NULL NULL 431 val_431 +NULL NULL NULL NULL 431 val_431 +NULL NULL NULL NULL 432 val_432 +NULL NULL NULL NULL 435 val_435 +NULL NULL NULL NULL 436 val_436 +NULL NULL NULL NULL 437 val_437 +NULL NULL NULL NULL 438 val_438 +NULL NULL NULL NULL 438 val_438 +NULL NULL NULL NULL 438 val_438 +NULL NULL NULL NULL 439 val_439 +NULL NULL NULL NULL 439 val_439 +NULL NULL NULL NULL 44 val_44 +NULL NULL NULL NULL 443 val_443 +NULL NULL NULL NULL 444 val_444 +NULL NULL NULL NULL 446 val_446 +NULL NULL NULL NULL 448 val_448 +NULL NULL NULL NULL 449 val_449 +NULL NULL NULL NULL 452 val_452 +NULL NULL NULL NULL 453 val_453 +NULL NULL NULL NULL 454 val_454 +NULL NULL NULL NULL 454 val_454 +NULL NULL NULL NULL 454 val_454 +NULL NULL NULL NULL 455 val_455 +NULL NULL NULL NULL 457 val_457 +NULL NULL NULL NULL 458 val_458 +NULL NULL NULL NULL 458 val_458 +NULL NULL NULL NULL 459 val_459 +NULL NULL NULL NULL 459 val_459 +NULL NULL NULL NULL 460 val_460 +NULL NULL NULL NULL 462 val_462 +NULL NULL NULL NULL 462 val_462 +NULL NULL NULL NULL 463 val_463 +NULL NULL NULL NULL 463 val_463 +NULL NULL NULL NULL 466 val_466 +NULL NULL NULL NULL 466 val_466 +NULL NULL NULL NULL 466 val_466 +NULL NULL NULL NULL 467 val_467 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 47 val_47 +NULL NULL NULL NULL 470 val_470 +NULL NULL NULL NULL 472 val_472 +NULL NULL NULL NULL 475 val_475 +NULL NULL NULL NULL 477 val_477 +NULL NULL NULL NULL 478 val_478 +NULL NULL NULL NULL 478 val_478 +NULL NULL NULL NULL 479 val_479 +NULL NULL NULL NULL 480 val_480 +NULL NULL NULL NULL 480 val_480 +NULL NULL NULL NULL 480 val_480 +NULL NULL NULL NULL 481 val_481 +NULL NULL NULL NULL 482 val_482 +NULL NULL NULL NULL 483 val_483 +NULL NULL NULL NULL 484 val_484 +NULL NULL NULL NULL 485 val_485 +NULL NULL NULL NULL 487 val_487 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 490 val_490 +NULL NULL NULL NULL 491 val_491 +NULL NULL NULL NULL 492 val_492 +NULL NULL NULL NULL 492 val_492 +NULL NULL NULL NULL 493 val_493 +NULL NULL NULL NULL 494 val_494 +NULL NULL NULL NULL 495 val_495 +NULL NULL NULL NULL 496 val_496 +NULL NULL NULL NULL 497 val_497 +NULL NULL NULL NULL 498 val_498 +NULL NULL NULL NULL 498 val_498 +NULL NULL NULL NULL 498 val_498 +NULL NULL NULL NULL 5 val_5 +NULL NULL NULL NULL 5 val_5 +NULL NULL NULL NULL 5 val_5 +NULL NULL NULL NULL 51 val_51 +NULL NULL NULL NULL 51 val_51 +NULL NULL NULL NULL 53 val_53 +NULL NULL NULL NULL 54 val_54 +NULL NULL NULL NULL 57 val_57 +NULL NULL NULL NULL 58 val_58 +NULL NULL NULL NULL 58 val_58 +NULL NULL NULL NULL 64 val_64 +NULL NULL NULL NULL 65 val_65 +NULL NULL NULL NULL 66 val_66 +NULL NULL NULL NULL 67 val_67 +NULL NULL NULL NULL 67 val_67 +NULL NULL NULL NULL 69 val_69 +NULL NULL NULL NULL 70 val_70 +NULL NULL NULL NULL 70 val_70 +NULL NULL NULL NULL 70 val_70 +NULL NULL NULL NULL 72 val_72 +NULL NULL NULL NULL 72 val_72 +NULL NULL NULL NULL 74 val_74 +NULL NULL NULL NULL 76 val_76 +NULL NULL NULL NULL 76 val_76 +NULL NULL NULL NULL 77 val_77 +NULL NULL NULL NULL 78 val_78 +NULL NULL NULL NULL 8 val_8 +NULL NULL NULL NULL 80 val_80 +NULL NULL NULL NULL 82 val_82 +NULL NULL NULL NULL 83 val_83 +NULL NULL NULL NULL 83 val_83 +NULL NULL NULL NULL 84 val_84 +NULL NULL NULL NULL 84 val_84 +NULL NULL NULL NULL 85 val_85 +NULL NULL NULL NULL 86 val_86 +NULL NULL NULL NULL 87 val_87 +NULL NULL NULL NULL 9 val_9 +NULL NULL NULL NULL 90 val_90 +NULL NULL NULL NULL 90 val_90 +NULL NULL NULL NULL 90 val_90 +NULL NULL NULL NULL 92 val_92 +NULL NULL NULL NULL 95 val_95 +NULL NULL NULL NULL 95 val_95 +NULL NULL NULL NULL 96 val_96 +NULL NULL NULL NULL 97 val_97 +NULL NULL NULL NULL 97 val_97 +NULL NULL NULL NULL 98 val_98 +NULL NULL NULL NULL 98 val_98 +PREHOOK: query: explain +SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 10) and (key > 10)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 10)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 {(KEY.reducesinkkey0 < 10)} + 1 + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: explain +SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 > 10)} + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 2 val_2 2 val_2 +NULL NULL 4 val_4 4 val_4 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 8 val_8 8 val_8 +NULL NULL 9 val_9 9 val_9 +PREHOOK: query: explain +SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 10) and key is not null) and (key > 10)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key > 10) and key is not null) and (key < 10)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key > 10) and key is not null) and (key < 10)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 59 Data size: 629 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 59 Data size: 629 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 59 Data size: 629 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 59 Data size: 629 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 629 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/auto_join3.q.out ql/src/test/results/clientpositive/spark/auto_join3.q.out new file mode 100644 index 0000000..64940de --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join3.q.out @@ -0,0 +1,132 @@ +PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: explain +FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + 2 {VALUE._col0} + outputColumnNames: _col0, _col11 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value SIMPLE [(src)src3.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +344360994461 diff --git ql/src/test/results/clientpositive/spark/auto_join30.q.out ql/src/test/results/clientpositive/spark/auto_join30.q.out new file mode 100644 index 0000000..98b3974 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join30.q.out @@ -0,0 +1,1391 @@ +PREHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +103231310608 +PREHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +103231310608 +PREHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +103231310608 +PREHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 8 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP SORT, 1) + Reducer 8 <- Map 7 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 + outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 8 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +348019368476 +PREHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 8 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP SORT, 1) + Reducer 8 <- Map 7 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 8 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +348019368476 +PREHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 8 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP SORT, 1) + Reducer 8 <- Map 7 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 8 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +348019368476 +PREHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 8 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP SORT, 1) + Reducer 8 <- Map 7 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 8 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +348019368476 +PREHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 8 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP SORT, 1) + Reducer 8 <- Map 7 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 8 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +348019368476 diff --git ql/src/test/results/clientpositive/spark/auto_join31.q.out ql/src/test/results/clientpositive/spark/auto_join31.q.out new file mode 100644 index 0000000..df502c8 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join31.q.out @@ -0,0 +1,187 @@ +PREHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 8 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP SORT, 1) + Reducer 8 <- Map 7 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 8 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +348019368476 diff --git ql/src/test/results/clientpositive/spark/auto_join32.q.out ql/src/test/results/clientpositive/spark/auto_join32.q.out new file mode 100644 index 0000000..8d83188 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join32.q.out @@ -0,0 +1,615 @@ +PREHOOK: query: -- empty tables +create table studenttab10k (name string, age int, gpa double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@studenttab10k +POSTHOOK: query: -- empty tables +create table studenttab10k (name string, age int, gpa double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@studenttab10k +PREHOOK: query: create table votertab10k (name string, age int, registration string, contributions float) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@votertab10k +POSTHOOK: query: create table votertab10k (name string, age int, registration string, contributions float) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@votertab10k +PREHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: v + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: name (type: string) + sort order: + + Map-reduce partition columns: name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: registration (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: name (type: string) + sort order: + + Map-reduce partition columns: name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col1} + outputColumnNames: _col0, _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col8) + keys: _col0 (type: string), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k +PREHOOK: Input: default@votertab10k +#### A masked pattern was here #### +POSTHOOK: query: select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k +POSTHOOK: Input: default@votertab10k +#### A masked pattern was here #### +PREHOOK: query: -- smb +create table studenttab10k_smb (name string, age int, gpa double) clustered by (name) sorted by (name) into 2 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@studenttab10k_smb +POSTHOOK: query: -- smb +create table studenttab10k_smb (name string, age int, gpa double) clustered by (name) sorted by (name) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@studenttab10k_smb +PREHOOK: query: create table votertab10k_smb (name string, age int, registration string, contributions float) clustered by (name) sorted by (name) into 2 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@votertab10k_smb +POSTHOOK: query: create table votertab10k_smb (name string, age int, registration string, contributions float) clustered by (name) sorted by (name) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@votertab10k_smb +PREHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: v + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: name (type: string) + sort order: + + Map-reduce partition columns: name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: registration (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: name (type: string) + sort order: + + Map-reduce partition columns: name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col1} + outputColumnNames: _col0, _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col8) + keys: _col0 (type: string), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k_smb +PREHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +POSTHOOK: query: select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k_smb +POSTHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +PREHOOK: query: load data local inpath '../../data/files/empty1.txt' into table studenttab10k_smb +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@studenttab10k_smb +POSTHOOK: query: load data local inpath '../../data/files/empty1.txt' into table studenttab10k_smb +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@studenttab10k_smb +PREHOOK: query: load data local inpath '../../data/files/empty2.txt' into table studenttab10k_smb +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@studenttab10k_smb +POSTHOOK: query: load data local inpath '../../data/files/empty2.txt' into table studenttab10k_smb +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@studenttab10k_smb +PREHOOK: query: load data local inpath '../../data/files/empty1.txt' into table votertab10k_smb +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@votertab10k_smb +POSTHOOK: query: load data local inpath '../../data/files/empty1.txt' into table votertab10k_smb +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@votertab10k_smb +PREHOOK: query: load data local inpath '../../data/files/empty2.txt' into table votertab10k_smb +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@votertab10k_smb +POSTHOOK: query: load data local inpath '../../data/files/empty2.txt' into table votertab10k_smb +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@votertab10k_smb +PREHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: v + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: name (type: string) + sort order: + + Map-reduce partition columns: name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: registration (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: name (type: string) + sort order: + + Map-reduce partition columns: name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col1} + outputColumnNames: _col0, _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col8) + keys: _col0 (type: string), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k_smb +PREHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +POSTHOOK: query: select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k_smb +POSTHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +PREHOOK: query: -- smb + partitions +create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@studenttab10k_part +POSTHOOK: query: -- smb + partitions +create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@studenttab10k_part +PREHOOK: query: create table votertab10k_part (name string, age int, registration string, contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@votertab10k_part +POSTHOOK: query: create table votertab10k_part (name string, age int, registration string, contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@votertab10k_part +PREHOOK: query: load data local inpath '../../data/files/empty1.txt' into table studenttab10k_part partition (p='foo') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@studenttab10k_part +POSTHOOK: query: load data local inpath '../../data/files/empty1.txt' into table studenttab10k_part partition (p='foo') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@studenttab10k_part +POSTHOOK: Output: default@studenttab10k_part@p=foo +PREHOOK: query: load data local inpath '../../data/files/empty2.txt' into table studenttab10k_part partition (p='foo') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@studenttab10k_part@p=foo +POSTHOOK: query: load data local inpath '../../data/files/empty2.txt' into table studenttab10k_part partition (p='foo') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@studenttab10k_part@p=foo +PREHOOK: query: load data local inpath '../../data/files/empty1.txt' into table votertab10k_part partition (p='foo') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@votertab10k_part +POSTHOOK: query: load data local inpath '../../data/files/empty1.txt' into table votertab10k_part partition (p='foo') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@votertab10k_part +POSTHOOK: Output: default@votertab10k_part@p=foo +PREHOOK: query: load data local inpath '../../data/files/empty2.txt' into table votertab10k_part partition (p='foo') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@votertab10k_part@p=foo +POSTHOOK: query: load data local inpath '../../data/files/empty2.txt' into table votertab10k_part partition (p='foo') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@votertab10k_part@p=foo +PREHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map 4 + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col1} + outputColumnNames: _col0, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col9 (type: string) + outputColumnNames: _col0, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col9) + keys: _col0 (type: string), _col9 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k_part +PREHOOK: Input: default@votertab10k_part +#### A masked pattern was here #### +POSTHOOK: query: select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k_part +POSTHOOK: Input: default@votertab10k_part +#### A masked pattern was here #### +PREHOOK: query: drop table studenttab10k +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@studenttab10k +PREHOOK: Output: default@studenttab10k +POSTHOOK: query: drop table studenttab10k +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@studenttab10k +POSTHOOK: Output: default@studenttab10k +PREHOOK: query: drop table votertab10k +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@votertab10k +PREHOOK: Output: default@votertab10k +POSTHOOK: query: drop table votertab10k +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@votertab10k +POSTHOOK: Output: default@votertab10k +PREHOOK: query: drop table studenttab10k_smb +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@studenttab10k_smb +PREHOOK: Output: default@studenttab10k_smb +POSTHOOK: query: drop table studenttab10k_smb +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@studenttab10k_smb +POSTHOOK: Output: default@studenttab10k_smb +PREHOOK: query: drop table votertab10k_smb +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@votertab10k_smb +PREHOOK: Output: default@votertab10k_smb +POSTHOOK: query: drop table votertab10k_smb +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@votertab10k_smb +POSTHOOK: Output: default@votertab10k_smb +PREHOOK: query: drop table studenttab10k_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@studenttab10k_part +PREHOOK: Output: default@studenttab10k_part +POSTHOOK: query: drop table studenttab10k_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@studenttab10k_part +POSTHOOK: Output: default@studenttab10k_part +PREHOOK: query: drop table votertab10k_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@votertab10k_part +PREHOOK: Output: default@votertab10k_part +POSTHOOK: query: drop table votertab10k_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@votertab10k_part +POSTHOOK: Output: default@votertab10k_part diff --git ql/src/test/results/clientpositive/spark/auto_join4.q.out ql/src/test/results/clientpositive/spark/auto_join4.q.out new file mode 100644 index 0000000..014d3be --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join4.q.out @@ -0,0 +1,172 @@ +PREHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + LEFT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + LEFT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + LEFT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + LEFT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2,dest1.c3,dest1.c4)) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2,dest1.c3,dest1.c4)) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +5079148035 diff --git ql/src/test/results/clientpositive/spark/auto_join5.q.out ql/src/test/results/clientpositive/spark/auto_join5.q.out new file mode 100644 index 0000000..decc677 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join5.q.out @@ -0,0 +1,172 @@ +PREHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + RIGHT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + RIGHT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + RIGHT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + RIGHT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2,dest1.c3,dest1.c4)) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2,dest1.c3,dest1.c4)) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +9766083196 diff --git ql/src/test/results/clientpositive/spark/auto_join6.q.out ql/src/test/results/clientpositive/spark/auto_join6.q.out new file mode 100644 index 0000000..8d7c324 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join6.q.out @@ -0,0 +1,172 @@ +PREHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + FULL OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + FULL OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + FULL OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + FULL OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2,dest1.c3,dest1.c4)) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2,dest1.c3,dest1.c4)) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +2607643291 diff --git ql/src/test/results/clientpositive/spark/auto_join7.q.out ql/src/test/results/clientpositive/spark/auto_join7.q.out new file mode 100644 index 0000000..fcb435d --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join7.q.out @@ -0,0 +1,214 @@ +PREHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING, c3 INT, c4 STRING, c5 INT, c6 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING, c3 INT, c4 STRING, c5 INT, c6 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + FULL OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + LEFT OUTER JOIN + ( + FROM src src3 SELECT src3.key AS c5, src3.value AS c6 WHERE src3.key > 20 and src3.key < 25 + ) c + ON (a.c1 = c.c5) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + FULL OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + LEFT OUTER JOIN + ( + FROM src src3 SELECT src3.key AS c5, src3.value AS c6 WHERE src3.key > 20 and src3.key < 25 + ) c + ON (a.c1 = c.c5) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 20) and (key < 25)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string), UDFToInteger(_col4) (type: int), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + FULL OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + LEFT OUTER JOIN + ( + FROM src src3 SELECT src3.key AS c5, src3.value AS c6 WHERE src3.key > 20 and src3.key < 25 + ) c + ON (a.c1 = c.c5) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + FULL OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + LEFT OUTER JOIN + ( + FROM src src3 SELECT src3.key AS c5, src3.value AS c6 WHERE src3.key > 20 and src3.key < 25 + ) c + ON (a.c1 = c.c5) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src3.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c6 SIMPLE [(src)src3.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2,dest1.c3,dest1.c4,dest1.c5,dest1.c6)) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2,dest1.c3,dest1.c4,dest1.c5,dest1.c6)) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +-2315698213 diff --git ql/src/test/results/clientpositive/spark/auto_join8.q.out ql/src/test/results/clientpositive/spark/auto_join8.q.out new file mode 100644 index 0000000..0ef5e16 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join8.q.out @@ -0,0 +1,175 @@ +PREHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING, c3 INT, c4 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + LEFT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + LEFT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 is null (type: boolean) + Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(null) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + LEFT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2 WHERE src1.key > 10 and src1.key < 20 + ) a + LEFT OUTER JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src2.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2,dest1.c3,dest1.c4)) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2,dest1.c3,dest1.c4)) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +-7158439905 diff --git ql/src/test/results/clientpositive/spark/auto_join9.q.out ql/src/test/results/clientpositive/spark/auto_join9.q.out new file mode 100644 index 0000000..2c4f277 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join9.q.out @@ -0,0 +1,121 @@ +PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: explain +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col8 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +101861029915 diff --git ql/src/test/results/clientpositive/spark/auto_join_filters.q.out ql/src/test/results/clientpositive/spark/auto_join_filters.q.out new file mode 100644 index 0000000..dd15363 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join_filters.q.out @@ -0,0 +1,532 @@ +PREHOOK: query: CREATE TABLE myinput1(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@myinput1 +POSTHOOK: query: CREATE TABLE myinput1(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@myinput1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@myinput1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@myinput1 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4937935 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4937935 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4937935 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4937935 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4937935 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3080335 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_input1 +POSTHOOK: query: CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_input1 +PREHOOK: query: CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_input2 +POSTHOOK: query: CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_input2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_input1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_input1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_input1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_input1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_input2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_input2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_input2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_input2 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 diff --git ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out new file mode 100644 index 0000000..939e5ab --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out @@ -0,0 +1,214 @@ +PREHOOK: query: CREATE TABLE myinput1(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@myinput1 +POSTHOOK: query: CREATE TABLE myinput1(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@myinput1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@myinput1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@myinput1 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +13630578 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +13630578 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +13630578 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4509856 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3112070 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3078400 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4542003 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4542038 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543491 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4542003 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3079923 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4509891 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3113558 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3079923 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3112070 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3113558 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +3112070 diff --git ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out new file mode 100644 index 0000000..b9f4666 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out @@ -0,0 +1,645 @@ +PREHOOK: query: -- HIVE-5056 RS has expression list for values, but it's ignored in MapJoinProcessor + +create table testsrc ( `key` int,`val` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testsrc +POSTHOOK: query: -- HIVE-5056 RS has expression list for values, but it's ignored in MapJoinProcessor + +create table testsrc ( `key` int,`val` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testsrc +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table testsrc +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@testsrc +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table testsrc +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@testsrc +PREHOOK: query: drop table if exists orderpayment_small +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists orderpayment_small +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table orderpayment_small (`dealid` int,`date` string,`time` string, `cityid` int, `userid` int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orderpayment_small +POSTHOOK: query: create table orderpayment_small (`dealid` int,`date` string,`time` string, `cityid` int, `userid` int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orderpayment_small +PREHOOK: query: insert overwrite table orderpayment_small select 748, '2011-03-24', '2011-03-24', 55 ,5372613 from testsrc tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@testsrc +PREHOOK: Output: default@orderpayment_small +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table orderpayment_small select 748, '2011-03-24', '2011-03-24', 55 ,5372613 from testsrc tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testsrc +POSTHOOK: Output: default@orderpayment_small +POSTHOOK: Lineage: orderpayment_small.cityid SIMPLE [] +POSTHOOK: Lineage: orderpayment_small.date SIMPLE [] +POSTHOOK: Lineage: orderpayment_small.dealid SIMPLE [] +POSTHOOK: Lineage: orderpayment_small.time SIMPLE [] +POSTHOOK: Lineage: orderpayment_small.userid SIMPLE [] +PREHOOK: query: drop table if exists user_small +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists user_small +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table user_small( userid int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@user_small +POSTHOOK: query: create table user_small( userid int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@user_small +PREHOOK: query: insert overwrite table user_small select key from testsrc tablesample (100 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@testsrc +PREHOOK: Output: default@user_small +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table user_small select key from testsrc tablesample (100 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testsrc +POSTHOOK: Output: default@user_small +POSTHOOK: Lineage: user_small.userid SIMPLE [(testsrc)testsrc.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: explain extended SELECT + `dim_pay_date`.`date` + , `deal`.`dealid` +FROM `orderpayment_small` `orderpayment` +JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date` +JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid` +JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid` +JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid` +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended SELECT + `dim_pay_date`.`date` + , `deal`.`dealid` +FROM `orderpayment_small` `orderpayment` +JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date` +JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid` +JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid` +JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid` +limit 5 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_JOIN + TOK_JOIN + TOK_JOIN + TOK_TABREF + TOK_TABNAME + orderpayment_small + orderpayment + TOK_TABREF + TOK_TABNAME + orderpayment_small + dim_pay_date + = + . + TOK_TABLE_OR_COL + dim_pay_date + date + . + TOK_TABLE_OR_COL + orderpayment + date + TOK_TABREF + TOK_TABNAME + orderpayment_small + deal + = + . + TOK_TABLE_OR_COL + deal + dealid + . + TOK_TABLE_OR_COL + orderpayment + dealid + TOK_TABREF + TOK_TABNAME + orderpayment_small + order_city + = + . + TOK_TABLE_OR_COL + order_city + cityid + . + TOK_TABLE_OR_COL + orderpayment + cityid + TOK_TABREF + TOK_TABNAME + user_small + user + = + . + TOK_TABLE_OR_COL + user + userid + . + TOK_TABLE_OR_COL + orderpayment + userid + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + dim_pay_date + date + TOK_SELEXPR + . + TOK_TABLE_OR_COL + deal + dealid + TOK_LIMIT + 5 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 8 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 1), Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 9 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dim_pay_date + Statistics: Num rows: 0 Data size: 37 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: date is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: date (type: string) + sort order: + + Map-reduce partition columns: date (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: orderpayment_small + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns dealid,date,time,cityid,userid + columns.comments + columns.types int:string:string:int:int +#### A masked pattern was here #### + name default.orderpayment_small + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 37 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns dealid,date,time,cityid,userid + columns.comments + columns.types int:string:string:int:int +#### A masked pattern was here #### + name default.orderpayment_small + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 37 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orderpayment_small + name: default.orderpayment_small + Truncated Path -> Alias: + /orderpayment_small [dim_pay_date] + Map 6 + Map Operator Tree: + TableScan + alias: deal + Statistics: Num rows: 9 Data size: 37 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: dealid is not null (type: boolean) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: dealid (type: int) + sort order: + + Map-reduce partition columns: dealid (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: orderpayment_small + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns dealid,date,time,cityid,userid + columns.comments + columns.types int:string:string:int:int +#### A masked pattern was here #### + name default.orderpayment_small + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 37 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns dealid,date,time,cityid,userid + columns.comments + columns.types int:string:string:int:int +#### A masked pattern was here #### + name default.orderpayment_small + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 37 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orderpayment_small + name: default.orderpayment_small + Truncated Path -> Alias: + /orderpayment_small [deal] + Map 7 + Map Operator Tree: + TableScan + alias: order_city + Statistics: Num rows: 9 Data size: 37 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: cityid is not null (type: boolean) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: cityid (type: int) + sort order: + + Map-reduce partition columns: cityid (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: orderpayment_small + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns dealid,date,time,cityid,userid + columns.comments + columns.types int:string:string:int:int +#### A masked pattern was here #### + name default.orderpayment_small + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 37 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns dealid,date,time,cityid,userid + columns.comments + columns.types int:string:string:int:int +#### A masked pattern was here #### + name default.orderpayment_small + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 37 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orderpayment_small + name: default.orderpayment_small + Truncated Path -> Alias: + /orderpayment_small [order_city] + Map 8 + Map Operator Tree: + TableScan + alias: orderpayment + Statistics: Num rows: 0 Data size: 37 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((date is not null and dealid is not null) and cityid is not null) and userid is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: date (type: string) + sort order: + + Map-reduce partition columns: date (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + tag: 0 + value expressions: dealid (type: int), cityid (type: int), userid (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: orderpayment_small + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns dealid,date,time,cityid,userid + columns.comments + columns.types int:string:string:int:int +#### A masked pattern was here #### + name default.orderpayment_small + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 37 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns dealid,date,time,cityid,userid + columns.comments + columns.types int:string:string:int:int +#### A masked pattern was here #### + name default.orderpayment_small + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 37 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.orderpayment_small + name: default.orderpayment_small + Truncated Path -> Alias: + /orderpayment_small [orderpayment] + Map 9 + Map Operator Tree: + TableScan + alias: user + Statistics: Num rows: 97 Data size: 388 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: userid is not null (type: boolean) + Statistics: Num rows: 49 Data size: 196 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: userid (type: int) + sort order: + + Map-reduce partition columns: userid (type: int) + Statistics: Num rows: 49 Data size: 196 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: user_small + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns userid + columns.comments + columns.types int +#### A masked pattern was here #### + name default.user_small + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct user_small { i32 userid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 388 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns userid + columns.comments + columns.types int +#### A masked pattern was here #### + name default.user_small + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct user_small { i32 userid} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 388 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.user_small + name: default.user_small + Truncated Path -> Alias: + /user_small [user] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col2} {VALUE._col3} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col3, _col4, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + tag: 0 + value expressions: _col3 (type: int), _col4 (type: int), _col9 (type: string) + auto parallelism: true + Reducer 3 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col2} {VALUE._col3} {VALUE._col8} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col3, _col4, _col9, _col16 + Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col4 (type: int), _col9 (type: string), _col16 (type: int) + auto parallelism: true + Reducer 4 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col3} {VALUE._col8} {VALUE._col15} + 1 + outputColumnNames: _col4, _col9, _col16 + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col9 (type: string), _col16 (type: int) + auto parallelism: true + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col8} {VALUE._col15} + 1 + outputColumnNames: _col9, _col16 + Statistics: Num rows: 53 Data size: 215 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col9 (type: string), _col16 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 53 Data size: 215 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + `dim_pay_date`.`date` + , `deal`.`dealid` +FROM `orderpayment_small` `orderpayment` +JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date` +JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid` +JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid` +JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid` +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@orderpayment_small +PREHOOK: Input: default@user_small +#### A masked pattern was here #### +POSTHOOK: query: SELECT + `dim_pay_date`.`date` + , `deal`.`dealid` +FROM `orderpayment_small` `orderpayment` +JOIN `orderpayment_small` `dim_pay_date` ON `dim_pay_date`.`date` = `orderpayment`.`date` +JOIN `orderpayment_small` `deal` ON `deal`.`dealid` = `orderpayment`.`dealid` +JOIN `orderpayment_small` `order_city` ON `order_city`.`cityid` = `orderpayment`.`cityid` +JOIN `user_small` `user` ON `user`.`userid` = `orderpayment`.`userid` +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orderpayment_small +POSTHOOK: Input: default@user_small +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out new file mode 100644 index 0000000..f608cc5 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out @@ -0,0 +1,358 @@ +PREHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl1 +POSTHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl1 +PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl2 +POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl2 +PREHOOK: query: insert overwrite table tbl1 +select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tbl1 +select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl1 +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tbl2 +select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tbl2 +select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl2 +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- One of the subqueries contains a union, so it should not be converted to a sort-merge join. +explain +select count(*) from + ( + select * from + (select a.key as key, a.value as value from tbl1 a where key < 6 + union all + select a.key as key, a.value as value from tbl1 a where key < 6 + ) usubq1 ) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- One of the subqueries contains a union, so it should not be converted to a sort-merge join. +explain +select count(*) from + ( + select * from + (select a.key as key, a.value as value from tbl1 a where key < 6 + union all + select a.key as key, a.value as value from tbl1 a where key < 6 + ) usubq1 ) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Union 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + ( + select * from + (select a.key as key, a.value as value from tbl1 a where key < 6 + union all + select a.key as key, a.value as value from tbl1 a where key < 6 + ) usubq1 ) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + ( + select * from + (select a.key as key, a.value as value from tbl1 a where key < 6 + union all + select a.key as key, a.value as value from tbl1 a where key < 6 + ) usubq1 ) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +40 +PREHOOK: query: -- One of the subqueries contains a groupby, so it should not be converted to a sort-merge join. +explain +select count(*) from + (select a.key as key, count(*) as value from tbl1 a where key < 6 group by a.key) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- One of the subqueries contains a groupby, so it should not be converted to a sort-merge join. +explain +select count(*) from + (select a.key as key, count(*) as value from tbl1 a where key < 6 group by a.key) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key as key, count(*) as value from tbl1 a where key < 6 group by a.key) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key as key, count(*) as value from tbl1 a where key < 6 group by a.key) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +8 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_11.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_11.q.out new file mode 100644 index 0000000..3c26363 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_11.q.out @@ -0,0 +1,1515 @@ +PREHOOK: query: -- small 1 part, 2 bucket & big 2 part, 4 bucket + +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_small +POSTHOOK: query: -- small 1 part, 2 bucket & big 2 part, 4 bucket + +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_small +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_big +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +38 +PREHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +-- The tables are only bucketed and not sorted, the join should not be converted +-- Currenly, a join is only converted to a sort-merge join without a hint, automatic conversion to +-- bucketized mapjoin is not done +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +-- The tables are only bucketed and not sorted, the join should not be converted +-- Currenly, a join is only converted to a sort-merge join without a hint, automatic conversion to +-- bucketized mapjoin is not done +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +38 +PREHOOK: query: -- The join is converted to a bucketed mapjoin with a mapjoin hint +explain extended select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The join is converted to a bucketed mapjoin with a mapjoin hint +explain extended select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +38 +PREHOOK: query: -- HIVE-7023 +explain extended select /* + MAPJOIN(a,b) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key JOIN bucket_big c ON a.key = c.key +PREHOOK: type: QUERY +POSTHOOK: query: -- HIVE-7023 +explain extended select /* + MAPJOIN(a,b) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key JOIN bucket_big c ON a.key = c.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_TABREF + TOK_TABNAME + bucket_big + c + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + c + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 2 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [c] + /bucket_big/ds=2008-04-09 [c] + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /* + MAPJOIN(a,b) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key JOIN bucket_big c ON a.key = c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select /* + MAPJOIN(a,b) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key JOIN bucket_big c ON a.key = c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +180 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out new file mode 100644 index 0000000..65e496f --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out @@ -0,0 +1,641 @@ +PREHOOK: query: -- small 1 part, 2 bucket & big 2 part, 4 bucket + +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_small +POSTHOOK: query: -- small 1 part, 2 bucket & big 2 part, 4 bucket + +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_small +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_big +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: CREATE TABLE bucket_medium (key string, value string) partitioned by (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_medium +POSTHOOK: query: CREATE TABLE bucket_medium (key string, value string) partitioned by (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_medium +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_medium partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_medium +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_medium partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_medium +POSTHOOK: Output: default@bucket_medium@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_medium partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_medium@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_medium partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_medium@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_medium partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_medium@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_medium partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_medium@ds=2008-04-08 +PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_JOIN + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_medium + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_TABREF + TOK_TABNAME + bucket_big + c + = + . + TOK_TABLE_OR_COL + c + key + . + TOK_TABLE_OR_COL + b + key + TOK_TABREF + TOK_TABNAME + bucket_medium + d + = + . + TOK_TABLE_OR_COL + c + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 170 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 170 Basic stats: PARTIAL Column stats: NONE + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + numFiles 3 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 170 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_medium + name: default.bucket_medium + Truncated Path -> Alias: + /bucket_medium/ds=2008-04-08 [d] + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + numFiles 3 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 170 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_medium + name: default.bucket_medium + Truncated Path -> Alias: + /bucket_medium/ds=2008-04-08 [b] + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 2 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [c] + /bucket_big/ds=2008-04-09 [c] + Map 7 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 139 Data size: 14064 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 139 Data size: 14064 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 + 1 + 2 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_medium +PREHOOK: Input: default@bucket_medium@ds=2008-04-08 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_medium +POSTHOOK: Input: default@bucket_medium@ds=2008-04-08 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +570 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out new file mode 100644 index 0000000..a5a281b --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out @@ -0,0 +1,709 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl1 +PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl2 +POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl2 +PREHOOK: query: insert overwrite table tbl1 select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tbl1 select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl1 +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tbl2 select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tbl2 select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl2 +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE dest1(k1 int, k2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(k1 int, k2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: CREATE TABLE dest2(k1 string, k2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest2 +POSTHOOK: query: CREATE TABLE dest2(k1 string, k2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest2 +PREHOOK: query: -- A SMB join followed by a mutli-insert +explain +from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +PREHOOK: type: QUERY +POSTHOOK: query: -- A SMB join followed by a mutli-insert +explain +from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.k1 SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.k2 SIMPLE [(tbl2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.k1 SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.k2 SIMPLE [(tbl2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +2 2 +4 4 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +8 8 +9 9 +PREHOOK: query: select * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_2 val_2 +val_4 val_4 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_8 val_8 +val_9 val_9 +PREHOOK: query: -- A SMB join followed by a mutli-insert +explain +from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +PREHOOK: type: QUERY +POSTHOOK: query: -- A SMB join followed by a mutli-insert +explain +from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.k1 SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.k2 SIMPLE [(tbl2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.k1 SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.k2 SIMPLE [(tbl2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +2 2 +4 4 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +8 8 +9 9 +PREHOOK: query: select * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_2 val_2 +val_4 val_4 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_8 val_8 +val_9 val_9 +PREHOOK: query: -- A SMB join followed by a mutli-insert +explain +from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +PREHOOK: type: QUERY +POSTHOOK: query: -- A SMB join followed by a mutli-insert +explain +from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: from ( + SELECT a.key key1, a.value value1, b.key key2, b.value value2 + FROM tbl1 a JOIN tbl2 b + ON a.key = b.key ) subq +INSERT OVERWRITE TABLE dest1 select key1, key2 +INSERT OVERWRITE TABLE dest2 select value1, value2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.k1 SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.k2 SIMPLE [(tbl2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.k1 SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.k2 SIMPLE [(tbl2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +2 2 +4 4 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +5 5 +8 8 +9 9 +PREHOOK: query: select * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_0 val_0 +val_2 val_2 +val_4 val_4 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_5 val_5 +val_8 val_8 +val_9 val_9 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out new file mode 100644 index 0000000..2fc3bb6 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out @@ -0,0 +1,241 @@ +PREHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl1 +POSTHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl1 +PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl2 +POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl2 +PREHOOK: query: insert overwrite table tbl1 select * from src where key < 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tbl1 select * from src where key < 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl1 +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tbl2 select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tbl2 select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl2 +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Since tbl1 is the bigger table, tbl1 Left Outer Join tbl2 can be performed +explain +select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Since tbl1 is the bigger table, tbl1 Left Outer Join tbl2 can be performed +explain +select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 49 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 49 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +32 +PREHOOK: query: insert overwrite table tbl2 select * from src where key < 200 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tbl2 select * from src where key < 200 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl2 +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Since tbl2 is the bigger table, tbl1 Right Outer Join tbl2 can be performed +explain +select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Since tbl2 is the bigger table, tbl1 Right Outer Join tbl2 can be performed +explain +select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 520 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 520 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 572 Data size: 2288 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 572 Data size: 2288 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +207 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out new file mode 100644 index 0000000..74cbd7c --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out @@ -0,0 +1,204 @@ +PREHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl1 +POSTHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl1 +PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl2 +POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl2 +PREHOOK: query: insert overwrite table tbl1 select * from src where key < 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tbl1 select * from src where key < 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl1 +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tbl2 select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tbl2 select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl2 +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 49 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 49 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 49 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 49 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out new file mode 100644 index 0000000..d1bb7a0 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out @@ -0,0 +1,723 @@ +PREHOOK: query: -- small 1 part, 4 bucket & big 2 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_small +POSTHOOK: query: -- small 1 part, 4 bucket & big 2 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_small +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_big +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: -- Since the leftmost table is assumed as the big table, arrange the tables in the join accordingly +explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Since the leftmost table is assumed as the big table, arrange the tables in the join accordingly +explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +38 +PREHOOK: query: -- The mapjoin should fail resulting in the sort-merge join +explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The mapjoin should fail resulting in the sort-merge join +explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +38 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out new file mode 100644 index 0000000..d57a1d7 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out @@ -0,0 +1,1038 @@ +PREHOOK: query: -- small 2 part, 2 bucket & big 1 part, 4 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_small +POSTHOOK: query: -- small 2 part, 2 bucket & big 1 part, 4 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_small +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small +POSTHOOK: Output: default@bucket_small@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-09 +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_big +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + /bucket_small/ds=2008-04-09 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +PREHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +POSTHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +38 +PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +PREHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +POSTHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +38 +PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +PREHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +POSTHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +38 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out new file mode 100644 index 0000000..8244c50 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out @@ -0,0 +1,1054 @@ +PREHOOK: query: -- small 2 part, 4 bucket & big 1 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_small +POSTHOOK: query: -- small 2 part, 4 bucket & big 1 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_small +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small +POSTHOOK: Output: default@bucket_small@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-09 +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_big +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + /bucket_small/ds=2008-04-09 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +PREHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +POSTHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +38 +PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +PREHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +POSTHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +38 +PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +PREHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +POSTHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +38 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out new file mode 100644 index 0000000..2ab1bca --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out @@ -0,0 +1,830 @@ +PREHOOK: query: -- small no part, 4 bucket & big no part, 2 bucket +CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_small +POSTHOOK: query: -- small no part, 4 bucket & big no part, 2 bucket +CREATE TABLE bucket_small (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_small +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_big +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +PREHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: bucket_big + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: bucket_small + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_small +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_small +#### A masked pattern was here #### +19 +PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: bucket_small + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: bucket_big + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_small +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_small +#### A masked pattern was here #### +19 +PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: bucket_small + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: bucket_big + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_small +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_small +#### A masked pattern was here #### +19 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out new file mode 100644 index 0000000..bc4a163 --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out @@ -0,0 +1,1356 @@ +PREHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl1 +POSTHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl1 +PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl2 +POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl2 +PREHOOK: query: CREATE TABLE tbl3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl3 +POSTHOOK: query: CREATE TABLE tbl3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl3 +PREHOOK: query: CREATE TABLE tbl4(key int, value string) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl4 +POSTHOOK: query: CREATE TABLE tbl4(key int, value string) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl4 +PREHOOK: query: insert overwrite table tbl1 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tbl1 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl1 +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tbl2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tbl2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl2 +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tbl3 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl3 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tbl3 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl3 +POSTHOOK: Lineage: tbl3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tbl4 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl4 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tbl4 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl4 +POSTHOOK: Lineage: tbl4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- A SMB join is being followed by a regular join on a non-bucketed table on a different key + +-- Three tests below are all the same query with different alias, which changes dispatch order of GenMapRedWalker +-- This is dependent to iteration order of HashMap, so can be meaningless in non-sun jdk +-- b = TS[0]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8]-SEL[9]-FS[10] +-- c = TS[1]-RS[7]-JOIN[8] +-- a = TS[2]-MAPJOIN[11] +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value +PREHOOK: type: QUERY +POSTHOOK: query: -- A SMB join is being followed by a regular join on a non-bucketed table on a different key + +-- Three tests below are all the same query with different alias, which changes dispatch order of GenMapRedWalker +-- This is dependent to iteration order of HashMap, so can be meaningless in non-sun jdk +-- b = TS[0]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8]-SEL[9]-FS[10] +-- c = TS[1]-RS[7]-JOIN[8] +-- a = TS[2]-MAPJOIN[11] +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +2654 +PREHOOK: query: -- d = TS[0]-RS[7]-JOIN[8]-SEL[9]-FS[10] +-- b = TS[1]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8] +-- a = TS[2]-MAPJOIN[11] +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src d on d.value = a.value +PREHOOK: type: QUERY +POSTHOOK: query: -- d = TS[0]-RS[7]-JOIN[8]-SEL[9]-FS[10] +-- b = TS[1]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8] +-- a = TS[2]-MAPJOIN[11] +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src d on d.value = a.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src d on d.value = a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src d on d.value = a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +2654 +PREHOOK: query: -- b = TS[0]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8]-SEL[9]-FS[10] +-- a = TS[1]-MAPJOIN[11] +-- h = TS[2]-RS[7]-JOIN[8] +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src h on h.value = a.value +PREHOOK: type: QUERY +POSTHOOK: query: -- b = TS[0]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8]-SEL[9]-FS[10] +-- a = TS[1]-MAPJOIN[11] +-- h = TS[2]-RS[7]-JOIN[8] +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src h on h.value = a.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: h + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src h on h.value = a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src h on h.value = a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +2654 +PREHOOK: query: -- A SMB join is being followed by a regular join on a non-bucketed table on the same key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = a.key +PREHOOK: type: QUERY +POSTHOOK: query: -- A SMB join is being followed by a regular join on a non-bucketed table on the same key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + Statistics: Num rows: 1599 Data size: 6397 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1599 Data size: 6397 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +2654 +PREHOOK: query: -- A SMB join is being followed by a regular join on a bucketed table on the same key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl3 c on c.key = a.key +PREHOOK: type: QUERY +POSTHOOK: query: -- A SMB join is being followed by a regular join on a bucketed table on the same key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl3 c on c.key = a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + Statistics: Num rows: 1599 Data size: 6397 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1599 Data size: 6397 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl3 c on c.key = a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +PREHOOK: Input: default@tbl3 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl3 c on c.key = a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +POSTHOOK: Input: default@tbl3 +#### A masked pattern was here #### +2654 +PREHOOK: query: -- A SMB join is being followed by a regular join on a bucketed table on a different key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c on c.value = a.value +PREHOOK: type: QUERY +POSTHOOK: query: -- A SMB join is being followed by a regular join on a bucketed table on a different key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c on c.value = a.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c on c.value = a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +PREHOOK: Input: default@tbl4 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c on c.value = a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +POSTHOOK: Input: default@tbl4 +#### A masked pattern was here #### +2654 +PREHOOK: query: -- A SMB join is being followed by a regular join on a non-bucketed table on a different key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value +PREHOOK: type: QUERY +POSTHOOK: query: -- A SMB join is being followed by a regular join on a non-bucketed table on a different key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +2654 +PREHOOK: query: -- A SMB join is being followed by a regular join on a non-bucketed table on the same key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = a.key +PREHOOK: type: QUERY +POSTHOOK: query: -- A SMB join is being followed by a regular join on a non-bucketed table on the same key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + Statistics: Num rows: 1599 Data size: 6397 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1599 Data size: 6397 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +2654 +PREHOOK: query: -- A SMB join is being followed by a regular join on a bucketed table on the same key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl3 c on c.key = a.key +PREHOOK: type: QUERY +POSTHOOK: query: -- A SMB join is being followed by a regular join on a bucketed table on the same key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl3 c on c.key = a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + Statistics: Num rows: 1599 Data size: 6397 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1599 Data size: 6397 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl3 c on c.key = a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +PREHOOK: Input: default@tbl3 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl3 c on c.key = a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +POSTHOOK: Input: default@tbl3 +#### A masked pattern was here #### +2654 +PREHOOK: query: -- A SMB join is being followed by a regular join on a bucketed table on a different key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c on c.value = a.value +PREHOOK: type: QUERY +POSTHOOK: query: -- A SMB join is being followed by a regular join on a bucketed table on a different key +explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c on c.value = a.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c on c.value = a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +PREHOOK: Input: default@tbl4 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c on c.value = a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +POSTHOOK: Input: default@tbl4 +#### A masked pattern was here #### +2654 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out new file mode 100644 index 0000000..16ef3ae --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out @@ -0,0 +1,1224 @@ +PREHOOK: query: -- small 2 part, 4 bucket & big 2 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_small +POSTHOOK: query: -- small 2 part, 4 bucket & big 2 part, 2 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_small +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small +POSTHOOK: Output: default@bucket_small@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket4outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-09 +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_big +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + /bucket_small/ds=2008-04-09 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +PREHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +POSTHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +76 +PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +PREHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +POSTHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +76 +PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +PREHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +POSTHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +76 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out new file mode 100644 index 0000000..9fd3e5a --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out @@ -0,0 +1,1226 @@ +PREHOOK: query: -- small 2 part, 2 bucket & big 2 part, 4 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_small +POSTHOOK: query: -- small 2 part, 2 bucket & big 2 part, 4 bucket +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_small +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small +POSTHOOK: Output: default@bucket_small@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small@ds=2008-04-09 +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_big +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_big +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +PREHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + /bucket_small/ds=2008-04-09 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +PREHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +POSTHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +76 +PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +PREHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +POSTHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +76 +PREHOOK: query: -- The mapjoin should fail resulting in the sort-merge join +explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The mapjoin should fail resulting in the sort-merge join +explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +PREHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +POSTHOOK: Input: default@bucket_small@ds=2008-04-09 +#### A masked pattern was here #### +76 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out new file mode 100644 index 0000000..a7f994f --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out @@ -0,0 +1,3734 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl1 +PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl2 +POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl2 +PREHOOK: query: insert overwrite table tbl1 +select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tbl1 +select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl1 +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tbl2 +select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tbl2 +select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl2 +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +explain +select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +PREHOOK: type: QUERY +POSTHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +explain +select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +22 +PREHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +explain +select key, count(*) from +( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +group by key +PREHOOK: type: QUERY +POSTHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +explain +select key, count(*) from +( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from +( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from +( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +0 9 +2 1 +4 1 +5 9 +8 1 +9 1 +PREHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join +explain +select count(*) from +( + select key, count(*) from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +PREHOOK: type: QUERY +POSTHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join +explain +select count(*) from +( + select key, count(*) from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +( + select key, count(*) from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +( + select key, count(*) from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +6 +PREHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. +-- Each sub-query should be converted to a sort-merge join. +explain +select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. +-- Each sub-query should be converted to a sort-merge join. +explain +select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1), Reducer 8 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 7 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Map 9 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 8 <- Reducer 7 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 9 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +0 9 9 +2 1 1 +4 1 1 +5 9 9 +8 1 1 +9 1 1 +PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join. +explain +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join. +explain +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join, although there is more than one level of sub-query +explain +select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join, although there is more than one level of sub-query +explain +select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. +-- The join should be converted to a sort-merge join +explain +select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. +-- The join should be converted to a sort-merge join +explain +select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +#### A masked pattern was here #### +20 +PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key +-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one +-- item, but that is not part of the join key. +explain +select count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key +-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one +-- item, but that is not part of the join key. +explain +select count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized mapside +-- join should be performed +explain +select count(*) from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized mapside +-- join should be performed +explain +select count(*) from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +22 +PREHOOK: query: -- The left table is a sub-query and the right table is not. +-- It should be converted to a sort-merge join. +explain +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The left table is a sub-query and the right table is not. +-- It should be converted to a sort-merge join. +explain +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- The right table is a sub-query and the left table is not. +-- It should be converted to a sort-merge join. +explain +select count(*) from tbl1 a + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq1 + on a.key = subq1.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The right table is a sub-query and the left table is not. +-- It should be converted to a sort-merge join. +explain +select count(*) from tbl1 a + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq1 + on a.key = subq1.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tbl1 a + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq1 + on a.key = subq1.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tbl1 a + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq1 + on a.key = subq1.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. +-- It should be converted to to a sort-merge join +explain +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on (subq1.key = subq2.key) + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +PREHOOK: type: QUERY +POSTHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. +-- It should be converted to to a sort-merge join +explain +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on (subq1.key = subq2.key) + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +56 +PREHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that. +-- The join should be converted to a sort-merge join +explain +select count(*) from ( + select subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +PREHOOK: type: QUERY +POSTHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that. +-- The join should be converted to a sort-merge join +explain +select count(*) from ( + select subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from ( + select subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from ( + select subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +explain +select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +PREHOOK: type: QUERY +POSTHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +explain +select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +22 +PREHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +explain +select key, count(*) from +( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +group by key +PREHOOK: type: QUERY +POSTHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +explain +select key, count(*) from +( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from +( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from +( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +0 9 +2 1 +4 1 +5 9 +8 1 +9 1 +PREHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join +explain +select count(*) from +( + select key, count(*) from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +PREHOOK: type: QUERY +POSTHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join +explain +select count(*) from +( + select key, count(*) from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +( + select key, count(*) from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +( + select key, count(*) from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +6 +PREHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. +-- Each sub-query should be converted to a sort-merge join. +explain +select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. +-- Each sub-query should be converted to a sort-merge join. +explain +select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1), Reducer 8 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 7 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Map 9 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 8 <- Reducer 7 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 9 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +0 9 9 +2 1 1 +4 1 1 +5 9 9 +8 1 1 +9 1 1 +PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join. +explain +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join. +explain +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join, although there is more than one level of sub-query +explain +select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join, although there is more than one level of sub-query +explain +select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. +-- The join should be converted to a sort-merge join +explain +select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. +-- The join should be converted to a sort-merge join +explain +select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +#### A masked pattern was here #### +20 +PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key +-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one +-- item, but that is not part of the join key. +explain +select count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key +-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one +-- item, but that is not part of the join key. +explain +select count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- The left table is a sub-query and the right table is not. +-- It should be converted to a sort-merge join. +explain +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The left table is a sub-query and the right table is not. +-- It should be converted to a sort-merge join. +explain +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- The right table is a sub-query and the left table is not. +-- It should be converted to a sort-merge join. +explain +select count(*) from tbl1 a + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq1 + on a.key = subq1.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The right table is a sub-query and the left table is not. +-- It should be converted to a sort-merge join. +explain +select count(*) from tbl1 a + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq1 + on a.key = subq1.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from tbl1 a + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq1 + on a.key = subq1.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from tbl1 a + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq1 + on a.key = subq1.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. +-- It should be converted to to a sort-merge join +explain +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on (subq1.key = subq2.key) + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +PREHOOK: type: QUERY +POSTHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. +-- It should be converted to to a sort-merge join +explain +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on (subq1.key = subq2.key) + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +56 +PREHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that. +-- The join should be converted to a sort-merge join +explain +select count(*) from ( + select subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +PREHOOK: type: QUERY +POSTHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that. +-- The join should be converted to a sort-merge join +explain +select count(*) from ( + select subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from ( + select subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from ( + select subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 diff --git ql/src/test/results/clientpositive/spark/avro_joins.q.out ql/src/test/results/clientpositive/spark/avro_joins.q.out new file mode 100644 index 0000000..7e47fe0 --- /dev/null +++ ql/src/test/results/clientpositive/spark/avro_joins.q.out @@ -0,0 +1,199 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that new joins bring in correct schemas (including evolved schemas) + +CREATE TABLE doctors4 +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "doctors", + "type": "record", + "fields": [ + { + "name":"number", + "type":"int", + "doc":"Order of playing the role" + }, + { + "name":"first_name", + "type":"string", + "doc":"first name of actor playing role" + }, + { + "name":"last_name", + "type":"string", + "doc":"last name of actor playing role" + }, + { + "name":"extra_field", + "type":"string", + "doc:":"an extra field not in the original file", + "default":"fishfingers and custard" + } + ] +}') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@doctors4 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that new joins bring in correct schemas (including evolved schemas) + +CREATE TABLE doctors4 +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "doctors", + "type": "record", + "fields": [ + { + "name":"number", + "type":"int", + "doc":"Order of playing the role" + }, + { + "name":"first_name", + "type":"string", + "doc":"first name of actor playing role" + }, + { + "name":"last_name", + "type":"string", + "doc":"last name of actor playing role" + }, + { + "name":"extra_field", + "type":"string", + "doc:":"an extra field not in the original file", + "default":"fishfingers and custard" + } + ] +}') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@doctors4 +PREHOOK: query: DESCRIBE doctors4 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@doctors4 +POSTHOOK: query: DESCRIBE doctors4 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@doctors4 +number int from deserializer +first_name string from deserializer +last_name string from deserializer +extra_field string from deserializer +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@doctors4 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@doctors4 +PREHOOK: query: CREATE TABLE episodes +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "episodes", + "type": "record", + "fields": [ + { + "name":"title", + "type":"string", + "doc":"episode title" + }, + { + "name":"air_date", + "type":"string", + "doc":"initial date" + }, + { + "name":"doctor", + "type":"int", + "doc":"main actor playing the Doctor in episode" + } + ] +}') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@episodes +POSTHOOK: query: CREATE TABLE episodes +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "episodes", + "type": "record", + "fields": [ + { + "name":"title", + "type":"string", + "doc":"episode title" + }, + { + "name":"air_date", + "type":"string", + "doc":"initial date" + }, + { + "name":"doctor", + "type":"int", + "doc":"main actor playing the Doctor in episode" + } + ] +}') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@episodes +PREHOOK: query: DESCRIBE episodes +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@episodes +POSTHOOK: query: DESCRIBE episodes +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@episodes +title string from deserializer +air_date string from deserializer +doctor int from deserializer +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@episodes +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@episodes +PREHOOK: query: SELECT e.title, e.air_date, d.first_name, d.last_name, d.extra_field, e.air_date +FROM doctors4 d JOIN episodes e ON (d.number=e.doctor) +PREHOOK: type: QUERY +PREHOOK: Input: default@doctors4 +PREHOOK: Input: default@episodes +#### A masked pattern was here #### +POSTHOOK: query: SELECT e.title, e.air_date, d.first_name, d.last_name, d.extra_field, e.air_date +FROM doctors4 d JOIN episodes e ON (d.number=e.doctor) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@doctors4 +POSTHOOK: Input: default@episodes +#### A masked pattern was here #### +An Unearthly Child 23 November 1963 William Hartnell fishfingers and custard 23 November 1963 +Castrolava 4 January 1982 Peter Davison fishfingers and custard 4 January 1982 +Horror of Fang Rock 3 September 1977 Tom Baker fishfingers and custard 3 September 1977 +Rose 26 March 2005 Christopher Eccleston fishfingers and custard 26 March 2005 +The Doctor's Wife 14 May 2011 Matt Smith fishfingers and custard 14 May 2011 +The Eleventh Hour 3 April 2010 Matt Smith fishfingers and custard 3 April 2010 +The Mysterious Planet 6 September 1986 Colin Baker fishfingers and custard 6 September 1986 +The Power of the Daleks 5 November 1966 Patrick Troughton fishfingers and custard 5 November 1966 diff --git ql/src/test/results/clientpositive/spark/avro_joins_native.q.out ql/src/test/results/clientpositive/spark/avro_joins_native.q.out new file mode 100644 index 0000000..ca22a72 --- /dev/null +++ ql/src/test/results/clientpositive/spark/avro_joins_native.q.out @@ -0,0 +1,94 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that new joins bring in correct schemas (including evolved schemas) + +CREATE TABLE doctors4 ( + number int COMMENT "Order of playing the role", + first_name string COMMENT "first name of actor playing role", + last_name string COMMENT "last name of actor playing role") +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@doctors4 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that new joins bring in correct schemas (including evolved schemas) + +CREATE TABLE doctors4 ( + number int COMMENT "Order of playing the role", + first_name string COMMENT "first name of actor playing role", + last_name string COMMENT "last name of actor playing role") +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@doctors4 +PREHOOK: query: DESCRIBE doctors4 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@doctors4 +POSTHOOK: query: DESCRIBE doctors4 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@doctors4 +number int from deserializer +first_name string from deserializer +last_name string from deserializer +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@doctors4 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@doctors4 +PREHOOK: query: CREATE TABLE episodes ( + title string COMMENT "episode title", + air_date string COMMENT "initial date", + doctor int COMMENT "main actor playing the Doctor in episode") +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@episodes +POSTHOOK: query: CREATE TABLE episodes ( + title string COMMENT "episode title", + air_date string COMMENT "initial date", + doctor int COMMENT "main actor playing the Doctor in episode") +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@episodes +PREHOOK: query: DESCRIBE episodes +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@episodes +POSTHOOK: query: DESCRIBE episodes +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@episodes +title string from deserializer +air_date string from deserializer +doctor int from deserializer +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@episodes +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@episodes +PREHOOK: query: SELECT e.title, e.air_date, d.first_name, d.last_name, e.air_date +FROM doctors4 d JOIN episodes e ON (d.number=e.doctor) +PREHOOK: type: QUERY +PREHOOK: Input: default@doctors4 +PREHOOK: Input: default@episodes +#### A masked pattern was here #### +POSTHOOK: query: SELECT e.title, e.air_date, d.first_name, d.last_name, e.air_date +FROM doctors4 d JOIN episodes e ON (d.number=e.doctor) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@doctors4 +POSTHOOK: Input: default@episodes +#### A masked pattern was here #### +An Unearthly Child 23 November 1963 William Hartnell 23 November 1963 +Castrolava 4 January 1982 Peter Davison 4 January 1982 +Horror of Fang Rock 3 September 1977 Tom Baker 3 September 1977 +Rose 26 March 2005 Christopher Eccleston 26 March 2005 +The Doctor's Wife 14 May 2011 Matt Smith 14 May 2011 +The Eleventh Hour 3 April 2010 Matt Smith 3 April 2010 +The Mysterious Planet 6 September 1986 Colin Baker 6 September 1986 +The Power of the Daleks 5 November 1966 Patrick Troughton 5 November 1966 diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out new file mode 100644 index 0000000..4ec619e --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out @@ -0,0 +1,331 @@ +PREHOOK: query: drop table table1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table table1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table table2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table table2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table table1(key string, value string) clustered by (key, value) +sorted by (key, value) into 1 BUCKETS stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: create table table1(key string, value string) clustered by (key, value) +sorted by (key, value) into 1 BUCKETS stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: create table table2(key string, value string) clustered by (value, key) +sorted by (value, key) into 1 BUCKETS stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: create table table2(key string, value string) clustered by (value, key) +sorted by (value, key) into 1 BUCKETS stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: load data local inpath '../../data/files/SortCol1Col2.txt' overwrite into table table1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@table1 +POSTHOOK: query: load data local inpath '../../data/files/SortCol1Col2.txt' overwrite into table table1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@table1 +PREHOOK: query: load data local inpath '../../data/files/SortCol2Col1.txt' overwrite into table table2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@table2 +POSTHOOK: query: load data local inpath '../../data/files/SortCol2Col1.txt' overwrite into table table2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@table2 +PREHOOK: query: -- The tables are bucketed in same columns in different order, +-- but sorted in different column orders +-- Neither bucketed map-join, nor sort-merge join should be performed + +explain extended +select /*+ mapjoin(b) */ count(*) from table1 a join table2 b on a.key=b.key and a.value=b.value +PREHOOK: type: QUERY +POSTHOOK: query: -- The tables are bucketed in same columns in different order, +-- but sorted in different column orders +-- Neither bucketed map-join, nor sort-merge join should be performed + +explain extended +select /*+ mapjoin(b) */ count(*) from table1 a join table2 b on a.key=b.key and a.value=b.value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + table1 + a + TOK_TABREF + TOK_TABNAME + table2 + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + value + . + TOK_TABLE_OR_COL + b + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 21 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: table2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 1 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.table2 + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct table2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 1 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.table2 + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct table2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.table2 + name: default.table2 + Truncated Path -> Alias: + /table2 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: table1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 1 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.table1 + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct table1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 20 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 1 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.table1 + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct table1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 20 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.table1 + name: default.table1 + Truncated Path -> Alias: + /table1 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b) */ count(*) from table1 a join table2 b on a.key=b.key and a.value=b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b) */ count(*) from table1 a join table2 b on a.key=b.key and a.value=b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +4 diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out new file mode 100644 index 0000000..1c288c2 --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out @@ -0,0 +1,331 @@ +PREHOOK: query: drop table table1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table table1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table table2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table table2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table table1(key string, value string) clustered by (key, value) +sorted by (key desc, value desc) into 1 BUCKETS stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: create table table1(key string, value string) clustered by (key, value) +sorted by (key desc, value desc) into 1 BUCKETS stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: create table table2(key string, value string) clustered by (value, key) +sorted by (value desc, key desc) into 1 BUCKETS stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: create table table2(key string, value string) clustered by (value, key) +sorted by (value desc, key desc) into 1 BUCKETS stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: load data local inpath '../../data/files/SortCol1Col2.txt' overwrite into table table1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@table1 +POSTHOOK: query: load data local inpath '../../data/files/SortCol1Col2.txt' overwrite into table table1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@table1 +PREHOOK: query: load data local inpath '../../data/files/SortCol2Col1.txt' overwrite into table table2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@table2 +POSTHOOK: query: load data local inpath '../../data/files/SortCol2Col1.txt' overwrite into table table2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@table2 +PREHOOK: query: -- The tables are bucketed in same columns in different order, +-- but sorted in different column orders +-- Neither bucketed map-join, nor sort-merge join should be performed + +explain extended +select /*+ mapjoin(b) */ count(*) from table1 a join table2 b on a.key=b.key and a.value=b.value +PREHOOK: type: QUERY +POSTHOOK: query: -- The tables are bucketed in same columns in different order, +-- but sorted in different column orders +-- Neither bucketed map-join, nor sort-merge join should be performed + +explain extended +select /*+ mapjoin(b) */ count(*) from table1 a join table2 b on a.key=b.key and a.value=b.value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + table1 + a + TOK_TABREF + TOK_TABNAME + table2 + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + value + . + TOK_TABLE_OR_COL + b + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 21 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: table2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 1 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.table2 + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct table2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 1 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.table2 + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct table2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.table2 + name: default.table2 + Truncated Path -> Alias: + /table2 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: table1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 1 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.table1 + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct table1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 20 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 1 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.table1 + numFiles 1 + numRows 0 + rawDataSize 0 + serialization.ddl struct table1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 20 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.table1 + name: default.table1 + Truncated Path -> Alias: + /table1 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b) */ count(*) from table1 a join table2 b on a.key=b.key and a.value=b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b) */ count(*) from table1 a join table2 b on a.key=b.key and a.value=b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +4 diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out new file mode 100644 index 0000000..a2a7fe1 --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out @@ -0,0 +1,1253 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab_part +POSTHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab_part +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Output: default@tab_part@ds=2008-04-08 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Output: default@tab_part@ds=2008-04-08 +POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab +POSTHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab +PREHOOK: query: insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: Output: default@tab@ds=2008-04-08 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: Output: default@tab@ds=2008-04-08 +POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: explain +select a.key, a.value, b.value +from tab a join tab_part b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.value, b.value +from tab a join tab_part b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- one side is really bucketed. srcbucket_mapjoin is not really a bucketed table. +-- In this case the sub-query is chosen as the big table. +explain +select a.k1, a.v1, b.value +from (select sum(substr(srcbucket_mapjoin.value,5)) as v1, key as k1 from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a +join tab b on a.k1 = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- one side is really bucketed. srcbucket_mapjoin is not really a bucketed table. +-- In this case the sub-query is chosen as the big table. +explain +select a.k1, a.v1, b.value +from (select sum(substr(srcbucket_mapjoin.value,5)) as v1, key as k1 from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a +join tab b on a.k1 = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: srcbucket_mapjoin + Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(substr(value, 5)) + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 15 Data size: 1601 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 1601 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1601 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: double), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.k1, a.v1, b.value +from (select sum(substr(tab.value,5)) as v1, key as k1 from tab_part join tab on tab_part.key = tab.key GROUP BY tab.key) a +join tab b on a.k1 = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.k1, a.v1, b.value +from (select sum(substr(tab.value,5)) as v1, key as k1 from tab_part join tab on tab_part.key = tab.key GROUP BY tab.key) a +join tab b on a.k1 = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: tab + Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 438 Data size: 1755 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 438 Data size: 1755 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 438 Data size: 1755 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col6, _col7 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col6 (type: int), _col7 (type: string) + outputColumnNames: _col6, _col7 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(substr(_col7, 5)) + keys: _col6 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 399 Data size: 1596 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: double), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 399 Data size: 1596 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 399 Data size: 1596 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.k1, a.v1, b.value +from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab x join tab y on x.key = y.key GROUP BY x.key) a +join tab_part b on a.k1 = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.k1, a.v1, b.value +from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab x join tab y on x.key = y.key GROUP BY x.key) a +join tab_part b on a.k1 = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 702 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 351 Data size: 1404 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 351 Data size: 1404 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 212 Data size: 849 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 212 Data size: 849 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 212 Data size: 849 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 386 Data size: 1544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 386 Data size: 1544 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(substr(_col1, 5)) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 386 Data size: 1544 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 386 Data size: 1544 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 193 Data size: 772 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: double), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 193 Data size: 772 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 193 Data size: 772 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- multi-way join +explain +select a.key, a.value, b.value +from tab_part a join tab b on a.key = b.key join tab c on a.key = c.key +PREHOOK: type: QUERY +POSTHOOK: query: -- multi-way join +explain +select a.key, a.value, b.value +from tab_part a join tab b on a.key = b.key join tab c on a.key = c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 702 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 351 Data size: 1404 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 351 Data size: 1404 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + 2 + outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 772 Data size: 3088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 772 Data size: 3088 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 772 Data size: 3088 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.key, a.value, c.value +from (select x.key, x.value from tab_part x join tab y on x.key = y.key) a join tab c on a.key = c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.value, c.value +from (select x.key, x.value from tab_part x join tab y on x.key = y.key) a join tab c on a.key = c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 702 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 351 Data size: 1404 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 351 Data size: 1404 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 424 Data size: 1698 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 424 Data size: 1698 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 424 Data size: 1698 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 386 Data size: 1544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 386 Data size: 1544 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 386 Data size: 1544 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- in this case sub-query is the small table +explain +select a.key, a.value, b.value +from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a +join tab_part b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- in this case sub-query is the small table +explain +select a.key, a.value, b.value +from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a +join tab_part b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: srcbucket_mapjoin + Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(substr(value, 5)) + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.key, a.value, b.value +from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a +join tab_part b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.value, b.value +from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a +join tab_part b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: srcbucket_mapjoin + Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + value expressions: substr(value, 5) (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- join on non-bucketed column results in broadcast join. +explain +select a.key, a.value, b.value +from tab a join tab_part b on a.value = b.value +PREHOOK: type: QUERY +POSTHOOK: query: -- join on non-bucketed column results in broadcast join. +explain +select a.key, a.value, b.value +from tab a join tab_part b on a.value = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: CREATE TABLE tab1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab1 +POSTHOOK: query: CREATE TABLE tab1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab1 +PREHOOK: query: insert overwrite table tab1 +select key,value from srcbucket_mapjoin +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: Output: default@tab1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tab1 +select key,value from srcbucket_mapjoin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: Output: default@tab1 +POSTHOOK: Lineage: tab1.key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab1.value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: explain +select a.key, a.value, b.value +from tab1 a join tab_part b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.value, b.value +from tab1 a join tab_part b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col12 + Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col12 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out new file mode 100644 index 0000000..60e2a4c --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out @@ -0,0 +1,740 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab_part +POSTHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab_part +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Output: default@tab_part@ds=2008-04-08 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Output: default@tab_part@ds=2008-04-08 +POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab +POSTHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab +PREHOOK: query: insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: Output: default@tab@ds=2008-04-08 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: Output: default@tab@ds=2008-04-08 +POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 14 Data size: 1479 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col12 + Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col12 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 878 Data size: 3517 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: CREATE TABLE tab1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab1 +POSTHOOK: query: CREATE TABLE tab1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab1 +PREHOOK: query: insert overwrite table tab1 +select key,value from srcbucket_mapjoin +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: Output: default@tab1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tab1 +select key,value from srcbucket_mapjoin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: Output: default@tab1 +POSTHOOK: Lineage: tab1.key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab1.value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: explain +select a.key, a.value, b.value +from tab1 a join src b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.value, b.value +from tab1 a join src b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col1} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 1) and key is not null) (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 2) and key is not null) (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1 + Statistics: Num rows: 266 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 266 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 266 Data size: 1064 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 1) (type: boolean) + Statistics: Num rows: 484 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 484 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 484 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 484 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 484 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 484 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1 + Statistics: Num rows: 532 Data size: 2129 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 532 Data size: 2129 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 532 Data size: 2129 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 1) (type: boolean) + Statistics: Num rows: 484 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 484 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 484 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 484 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 484 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 484 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1 + Statistics: Num rows: 532 Data size: 2129 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 532 Data size: 2129 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 532 Data size: 2129 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 702 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 351 Data size: 1404 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 351 Data size: 1404 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: tab + Statistics: Num rows: 702 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 351 Data size: 1404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 351 Data size: 1404 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 351 Data size: 1404 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 351 Data size: 1404 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1 + Statistics: Num rows: 386 Data size: 1544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 386 Data size: 1544 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 386 Data size: 1544 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 175 Data size: 700 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 175 Data size: 700 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 175 Data size: 700 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select a.value, b.value from (select distinct value from tab) a join tab b on b.key = a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.value, b.value from (select distinct value from tab) a join tab b on b.key = a.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: tab + Statistics: Num rows: 28 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(value) is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 14 Data size: 1404 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 1404 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 1404 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col1} + outputColumnNames: _col0, _col2 + Statistics: Num rows: 15 Data size: 1601 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 1601 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1601 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 7 Data size: 702 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out new file mode 100644 index 0000000..bea92f9 --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out @@ -0,0 +1,1184 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: -- empty partitions (HIVE-3205) +explain extended +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key where b.ds="2008-04-08" +PREHOOK: type: QUERY +POSTHOOK: query: -- empty partitions (HIVE-3205) +explain extended +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key where b.ds="2008-04-08" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map 3 + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key where b.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key where b.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +#### A masked pattern was here #### +PREHOOK: query: explain extended +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key where b.ds="2008-04-08" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key where b.ds="2008-04-08" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map 3 + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key where b.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key where b.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +#### A masked pattern was here #### +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +PREHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part + Truncated Path -> Alias: + /srcbucket_mapjoin_part/ds=2008-04-08 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: srcbucket_mapjoin + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin + Truncated Path -> Alias: + /srcbucket_mapjoin [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +464 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +464 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +0 0 0 +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part + Truncated Path -> Alias: + /srcbucket_mapjoin_part/ds=2008-04-08 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: srcbucket_mapjoin + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin + Truncated Path -> Alias: + /srcbucket_mapjoin [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 8983 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 8983 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +464 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +464 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +0 0 0 diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out new file mode 100644 index 0000000..8be3edd --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out @@ -0,0 +1,525 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 3 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 3 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=2 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: -- The table bucketing metadata matches but the partition metadata does not, bucket map join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL +PREHOOK: type: QUERY +POSTHOOK: query: -- The table bucketing metadata matches but the partition metadata does not, bucket map join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + a + part + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + b + part + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1737 Data size: 6950 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 869 Data size: 3477 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 869 Data size: 3477 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 3 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 +#### A masked pattern was here #### + Partition + base file name: part=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 2 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/part=1 [b] + /srcbucket_mapjoin_part_2/part=2 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1737 Data size: 6950 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 869 Data size: 3477 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 869 Data size: 3477 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 +#### A masked pattern was here #### + Partition + base file name: part=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 2 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 3 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 955 Data size: 3824 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 955 Data size: 3824 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=2 +#### A masked pattern was here #### +2116 diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out new file mode 100644 index 0000000..9e45843 --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out @@ -0,0 +1,938 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 4 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 4 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=2 +PREHOOK: query: -- The table and partition bucketing metadata doesn't match but the bucket numbers of all partitions is +-- a power of 2 and the bucketing columns match so bucket map join should be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL +PREHOOK: type: QUERY +POSTHOOK: query: -- The table and partition bucketing metadata doesn't match but the bucket numbers of all partitions is +-- a power of 2 and the bucketing columns match so bucket map join should be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + a + part + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + b + part + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2140 Data size: 8562 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 4 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 +#### A masked pattern was here #### + Partition + base file name: part=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 2 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/part=1 [b] + /srcbucket_mapjoin_part_2/part=2 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2140 Data size: 8562 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 +#### A masked pattern was here #### + Partition + base file name: part=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 2 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 4 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=2 +#### A masked pattern was here #### +2420 +PREHOOK: query: EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = b.part AND a.part IS NOT NULL AND b.part IS NOT NULL +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = b.part AND a.part IS NOT NULL AND b.part IS NOT NULL +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + AND + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + . + TOK_TABLE_OR_COL + b + part + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + a + part + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + b + part + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2140 Data size: 8562 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int), part (type: string) + sort order: ++ + Map-reduce partition columns: key (type: int), part (type: string) + Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 4 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 +#### A masked pattern was here #### + Partition + base file name: part=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 2 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/part=1 [b] + /srcbucket_mapjoin_part_2/part=2 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2140 Data size: 8562 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int), part (type: string) + sort order: ++ + Map-reduce partition columns: key (type: int), part (type: string) + Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 +#### A masked pattern was here #### + Partition + base file name: part=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 2 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 4 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = b.part AND a.part IS NOT NULL AND b.part IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = b.part AND a.part IS NOT NULL AND b.part IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=2 +#### A masked pattern was here #### +928 diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out new file mode 100644 index 0000000..0c1ac4b --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out @@ -0,0 +1,689 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 NOT CLUSTERED +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 NOT CLUSTERED +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_3 (key INT, value STRING) PARTITIONED BY (part STRING) +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_3 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_3 (key INT, value STRING) PARTITIONED BY (part STRING) +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_3 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_3 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_3 +POSTHOOK: Output: default@srcbucket_mapjoin_part_3@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_3 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_3@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_3 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_3@part=1 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_3 CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_3 +PREHOOK: Output: default@srcbucket_mapjoin_part_3 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_3 CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_3 +POSTHOOK: Output: default@srcbucket_mapjoin_part_3 +PREHOOK: query: -- The partition bucketing metadata match but one table is not bucketed, bucket map join should still be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- The partition bucketing metadata match but one table is not bucketed, bucket map join should still be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + and + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + '1' + = + . + TOK_TABLE_OR_COL + b + part + '1' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/part=1 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/part=1 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +464 +PREHOOK: query: -- The table bucketing metadata match but one partition is not bucketed, bucket map join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_3 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- The table bucketing metadata match but one partition is not bucketed, bucket map join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_3 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_3 + b + and + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + '1' + = + . + TOK_TABLE_OR_COL + b + part + '1' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_3 + numFiles 2 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_3 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_3 + name: default.srcbucket_mapjoin_part_3 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_3/part=1 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/part=1 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_3 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_3 +PREHOOK: Input: default@srcbucket_mapjoin_part_3@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_3 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_3 +POSTHOOK: Input: default@srcbucket_mapjoin_part_3@part=1 +#### A masked pattern was here #### +464 diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out new file mode 100644 index 0000000..dc1b8cf --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out @@ -0,0 +1,1280 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (value) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (value) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: -- part=1 partition for srcbucket_mapjoin_part_1 is bucketed by 'value' +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: -- part=1 partition for srcbucket_mapjoin_part_1 is bucketed by 'value' +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: -- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: -- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') +SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: -- part=1 partition for srcbucket_mapjoin_part_2 is bucketed by 'key' +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: -- part=1 partition for srcbucket_mapjoin_part_2 is bucketed by 'key' +INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- part=1 partition for srcbucket_mapjoin_part_1 is bucketed by 'value' +-- and it is also being joined. So, bucketed map-join cannot be performed +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- part=1 partition for srcbucket_mapjoin_part_1 is bucketed by 'value' +-- and it is also being joined. So, bucketed map-join cannot be performed +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows -1 + partition_columns part + partition_columns.types string + rawDataSize -1 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/part=1 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2906 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name value + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows -1 + partition_columns part + partition_columns.types string + rawDataSize -1 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 +#### A masked pattern was here #### + Partition + base file name: part=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 2 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows -1 + partition_columns part + partition_columns.types string + rawDataSize -1 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 1598 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1598 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +2056 +PREHOOK: query: -- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' +-- and it is being joined. So, bucketed map-join can be performed +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key and a.part = '2' +PREHOOK: type: QUERY +POSTHOOK: query: -- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' +-- and it is being joined. So, bucketed map-join can be performed +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key and a.part = '2' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + '2' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows -1 + partition_columns part + partition_columns.types string + rawDataSize -1 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/part=1 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 2 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows -1 + partition_columns part + partition_columns.types string + rawDataSize -1 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/part=2 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key and a.part = '2' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key and a.part = '2' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +1028 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 drop partition (part = '1') +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 drop partition (part = '1') +POSTHOOK: type: ALTERTABLE_DROPPARTS +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: query: -- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' +-- and it is being joined. So, bucketed map-join can be performed +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' +-- and it is being joined. So, bucketed map-join can be performed +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows -1 + partition_columns part + partition_columns.types string + rawDataSize -1 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/part=1 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 2 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows -1 + partition_columns part + partition_columns.types string + rawDataSize -1 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/part=2 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +1028 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (value) INTO 2 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (value) INTO 2 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: -- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' +-- and it is being joined. So, bucketed map-join can be performed +-- The fact that the table is being bucketed by 'value' does not matter +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- part=2 partition for srcbucket_mapjoin_part_1 is bucketed by 'key' +-- and it is being joined. So, bucketed map-join can be performed +-- The fact that the table is being bucketed by 'value' does not matter +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows -1 + partition_columns part + partition_columns.types string + rawDataSize -1 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/part=1 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 2 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows -1 + partition_columns part + partition_columns.types string + rawDataSize -1 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name value + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/part=2 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +1028 diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out new file mode 100644 index 0000000..369b7fa --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out @@ -0,0 +1,1380 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +PREHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3062 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part + Truncated Path -> Alias: + /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin_part)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin_part)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +564 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin_part)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin_part)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +564 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +0 0 0 +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3062 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part + Truncated Path -> Alias: + /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11067 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11067 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin_part)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin_part)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +564 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin_part)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin_part)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +564 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +0 0 0 +PREHOOK: query: -- HIVE-3210 +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: -- HIVE-3210 +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3062 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3062 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part + Truncated Path -> Alias: + /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11067 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11067 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin_part)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin_part)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +1128 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin_part)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin_part)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +1128 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +0 0 0 diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out new file mode 100644 index 0000000..85ffbc9 --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out @@ -0,0 +1,942 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +PREHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + b + and + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + = + . + TOK_TABLE_OR_COL + a + ds + "2008-04-08" + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part + Truncated Path -> Alias: + /srcbucket_mapjoin_part/ds=2008-04-08 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3062 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin_part_2)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin_part_2)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +564 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin_part_2)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin_part_2)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +564 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +0 0 0 +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + b + and + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + = + . + TOK_TABLE_OR_COL + a + ds + "2008-04-08" + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part + Truncated Path -> Alias: + /srcbucket_mapjoin_part/ds=2008-04-08 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3062 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11067 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11067 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin_part_2)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin_part_2)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +564 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin_part_2)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin_part_2)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +564 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +0 0 0 diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out new file mode 100644 index 0000000..d11b9f4 --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out @@ -0,0 +1,870 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +PREHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: srcbucket_mapjoin + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin + Truncated Path -> Alias: + /srcbucket_mapjoin [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: srcbucket_mapjoin + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin + Truncated Path -> Alias: + /srcbucket_mapjoin [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +464 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +464 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +0 0 0 +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: srcbucket_mapjoin + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin + Truncated Path -> Alias: + /srcbucket_mapjoin [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: srcbucket_mapjoin + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin + Truncated Path -> Alias: + /srcbucket_mapjoin [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 8983 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 8983 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +464 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +464 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +0 0 0 diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out new file mode 100644 index 0000000..1bbc30c --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out @@ -0,0 +1,1050 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +PREHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 110 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part + Truncated Path -> Alias: + /srcbucket_mapjoin_part/ds=2008-04-08 [b] + /srcbucket_mapjoin_part/ds=2008-04-09 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: srcbucket_mapjoin + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin + Truncated Path -> Alias: + /srcbucket_mapjoin [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +928 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-09 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +928 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +0 0 0 +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3062 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3062 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: srcbucket_mapjoin + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin + Truncated Path -> Alias: + /srcbucket_mapjoin [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 17966 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 17966 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +0 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_1 +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: Output: default@bucketmapjoin_tmp_result +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +#### A masked pattern was here #### +0 +PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: default@bucketmapjoin_hash_result_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2 +select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: default@bucketmapjoin_hash_result_2 +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] +POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] +PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_hash_result_1 +PREHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 +from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b +on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_hash_result_1 +POSTHOOK: Input: default@bucketmapjoin_hash_result_2 +#### A masked pattern was here #### +NULL NULL NULL diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin6.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin6.q.out new file mode 100644 index 0000000..6f76566 --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucketmapjoin6.q.out @@ -0,0 +1,149 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table tmp1 (a string, b string) clustered by (a) sorted by (a) into 10 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table tmp1 (a string, b string) clustered by (a) sorted by (a) into 10 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp1 +PREHOOK: query: create table tmp2 (a string, b string) clustered by (a) sorted by (a) into 10 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp2 +POSTHOOK: query: create table tmp2 (a string, b string) clustered by (a) sorted by (a) into 10 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp2 +PREHOOK: query: insert overwrite table tmp1 select * from src where key < 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tmp1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tmp1 select * from src where key < 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tmp1 +POSTHOOK: Lineage: tmp1.a SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp1.b SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tmp2 select * from src where key < 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tmp2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tmp2 select * from src where key < 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tmp2 +POSTHOOK: Lineage: tmp2.a SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp2.b SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table tmp3 (a string, b string, c string) clustered by (a) sorted by (a) into 10 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp3 +POSTHOOK: query: create table tmp3 (a string, b string, c string) clustered by (a) sorted by (a) into 10 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp3 +PREHOOK: query: insert overwrite table tmp3 + select /*+ MAPJOIN(l) */ i.a, i.b, l.b + from tmp1 i join tmp2 l ON i.a = l.a +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp1 +PREHOOK: Input: default@tmp2 +PREHOOK: Output: default@tmp3 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tmp3 + select /*+ MAPJOIN(l) */ i.a, i.b, l.b + from tmp1 i join tmp2 l ON i.a = l.a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp1 +POSTHOOK: Input: default@tmp2 +POSTHOOK: Output: default@tmp3 +POSTHOOK: Lineage: tmp3.a SIMPLE [(tmp1)i.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: tmp3.b SIMPLE [(tmp1)i.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: tmp3.c SIMPLE [(tmp2)l.FieldSchema(name:b, type:string, comment:null), ] +PREHOOK: query: select * from tmp3 +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp3 +#### A masked pattern was here #### +POSTHOOK: query: select * from tmp3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp3 +#### A masked pattern was here #### +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +10 val_10 val_10 +11 val_11 val_11 +12 val_12 val_12 +12 val_12 val_12 +12 val_12 val_12 +12 val_12 val_12 +15 val_15 val_15 +15 val_15 val_15 +15 val_15 val_15 +15 val_15 val_15 +17 val_17 val_17 +18 val_18 val_18 +18 val_18 val_18 +18 val_18 val_18 +18 val_18 val_18 +19 val_19 val_19 +2 val_2 val_2 +20 val_20 val_20 +24 val_24 val_24 +24 val_24 val_24 +24 val_24 val_24 +24 val_24 val_24 +26 val_26 val_26 +26 val_26 val_26 +26 val_26 val_26 +26 val_26 val_26 +27 val_27 val_27 +28 val_28 val_28 +30 val_30 val_30 +33 val_33 val_33 +34 val_34 val_34 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +37 val_37 val_37 +37 val_37 val_37 +37 val_37 val_37 +37 val_37 val_37 +4 val_4 val_4 +41 val_41 val_41 +42 val_42 val_42 +42 val_42 val_42 +42 val_42 val_42 +42 val_42 val_42 +43 val_43 val_43 +44 val_44 val_44 +47 val_47 val_47 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +8 val_8 val_8 +9 val_9 val_9 diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out new file mode 100644 index 0000000..045f615 --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out @@ -0,0 +1,345 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (ds='2008-04-08', hr='0') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (ds='2008-04-08', hr='0') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 +PREHOOK: query: -- Tests that bucket map join works with a table with more than one level of partitioning + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.ds = '2008-04-08' AND b.ds = '2008-04-08' LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Tests that bucket map join works with a table with more than one level of partitioning + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.ds = '2008-04-08' AND b.ds = '2008-04-08' LIMIT 1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + b + ds + '2008-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_LIMIT + 1 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=0 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 0 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/ds=2008-04-08/hr=0 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=0 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 0 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows 0 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/ds=2008-04-08/hr=0 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col8 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.ds = '2008-04-08' AND b.ds = '2008-04-08' LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ a.key, b.value +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.ds = '2008-04-08' AND b.ds = '2008-04-08' LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@ds=2008-04-08/hr=0 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08/hr=0 +#### A masked pattern was here #### +0 val_0 diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out new file mode 100644 index 0000000..6d72fdf --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out @@ -0,0 +1,664 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: -- The partition bucketing metadata match but the tables have different numbers of buckets, bucket map join should still be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- The partition bucketing metadata match but the tables have different numbers of buckets, bucket map join should still be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + and + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + '1' + = + . + TOK_TABLE_OR_COL + b + part + '1' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/part=1 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/part=1 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +464 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (value) INTO 2 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (value) INTO 2 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: -- The partition bucketing metadata match but the tables are bucketed on different columns, bucket map join should still be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- The partition bucketing metadata match but the tables are bucketed on different columns, bucket map join should still be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + and + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + '1' + = + . + TOK_TABLE_OR_COL + b + part + '1' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name value + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/part=1 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/part=1 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +464 diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out new file mode 100644 index 0000000..d80bdcf --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out @@ -0,0 +1,705 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: -- The table bucketing metadata matches but the partitions have different numbers of buckets, bucket map join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- The table bucketing metadata matches but the partitions have different numbers of buckets, bucket map join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + and + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + '1' + = + . + TOK_TABLE_OR_COL + b + part + '1' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1050 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 525 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 525 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 3 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/part=1 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/part=1 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 577 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 577 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' and b.part = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +464 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 DROP PARTITION (part='1') +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 DROP PARTITION (part='1') +POSTHOOK: type: ALTERTABLE_DROPPARTS +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (value) INTO 2 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (value) INTO 2 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: -- The table bucketing metadata matches but the partitions are bucketed on different columns, bucket map join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- The table bucketing metadata matches but the partitions are bucketed on different columns, bucket map join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + '1' + = + . + TOK_TABLE_OR_COL + b + part + '1' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name value + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/part=1 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows 0 + partition_columns part + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/part=1 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +464 diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out new file mode 100644 index 0000000..85c51ff --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out @@ -0,0 +1,353 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + = + . + TOK_TABLE_OR_COL + b + ds + "2008-04-08" + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + numFiles 3 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part + Truncated Path -> Alias: + /srcbucket_mapjoin_part/ds=2008-04-08 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: srcbucket_mapjoin + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin + Truncated Path -> Alias: + /srcbucket_mapjoin [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out new file mode 100644 index 0000000..1fbcc48 --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out @@ -0,0 +1,403 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09 +PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + bucketmapjoin_tmp_result + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3062 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3062 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: srcbucket_mapjoin + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin + numFiles 2 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin + Truncated Path -> Alias: + /srcbucket_mapjoin [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin_negative3.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin_negative3.q.out new file mode 100644 index 0000000..cec3da5 --- /dev/null +++ ql/src/test/results/clientpositive/spark/bucketmapjoin_negative3.q.out @@ -0,0 +1,2269 @@ +PREHOOK: query: drop table test1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table test1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table test2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table test2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table test3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table test3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table test4 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table test4 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table test1 (key string, value string) clustered by (key) sorted by (key) into 3 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test1 +POSTHOOK: query: create table test1 (key string, value string) clustered by (key) sorted by (key) into 3 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test1 +PREHOOK: query: create table test2 (key string, value string) clustered by (value) sorted by (value) into 3 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test2 +POSTHOOK: query: create table test2 (key string, value string) clustered by (value) sorted by (value) into 3 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test2 +PREHOOK: query: create table test3 (key string, value string) clustered by (key, value) sorted by (key, value) into 3 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test3 +POSTHOOK: query: create table test3 (key string, value string) clustered by (key, value) sorted by (key, value) into 3 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test3 +PREHOOK: query: create table test4 (key string, value string) clustered by (value, key) sorted by (value, key) into 3 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test4 +POSTHOOK: query: create table test4 (key string, value string) clustered by (value, key) sorted by (value, key) into 3 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test4 +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE test1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@test1 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE test1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@test1 +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE test1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@test1 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE test1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@test1 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE test1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@test1 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE test1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@test1 +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE test2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@test2 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE test2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@test2 +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE test2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@test2 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE test2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@test2 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE test2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@test2 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE test2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@test2 +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE test3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@test3 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE test3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@test3 +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE test3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@test3 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE test3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@test3 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE test3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@test3 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE test3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@test3 +PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE test4 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@test4 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE test4 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@test4 +PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE test4 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@test4 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE test4 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@test4 +PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE test4 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@test4 +POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE test4 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@test4 +PREHOOK: query: -- should be allowed +explain extended select /* + MAPJOIN(R) */ * from test1 L join test1 R on L.key=R.key AND L.value=R.value +PREHOOK: type: QUERY +POSTHOOK: query: -- should be allowed +explain extended select /* + MAPJOIN(R) */ * from test1 L join test1 R on L.key=R.key AND L.value=R.value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test1 + L + TOK_TABREF + TOK_TABNAME + test1 + R + AND + = + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + R + key + = + . + TOK_TABLE_OR_COL + L + value + . + TOK_TABLE_OR_COL + R + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + R + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: r + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test1 + numFiles 3 + serialization.ddl struct test1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test1 + numFiles 3 + serialization.ddl struct test1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test1 + name: default.test1 + Truncated Path -> Alias: + /test1 [r] + Map 3 + Map Operator Tree: + TableScan + alias: l + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test1 + numFiles 3 + serialization.ddl struct test1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test1 + numFiles 3 + serialization.ddl struct test1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test1 + name: default.test1 + Truncated Path -> Alias: + /test1 [l] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test2 L join test2 R on L.key=R.key AND L.value=R.value +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test2 L join test2 R on L.key=R.key AND L.value=R.value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test2 + L + TOK_TABREF + TOK_TABNAME + test2 + R + AND + = + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + R + key + = + . + TOK_TABLE_OR_COL + L + value + . + TOK_TABLE_OR_COL + R + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + R + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: r + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test2 + numFiles 3 + serialization.ddl struct test2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test2 + numFiles 3 + serialization.ddl struct test2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test2 + name: default.test2 + Truncated Path -> Alias: + /test2 [r] + Map 3 + Map Operator Tree: + TableScan + alias: l + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test2 + numFiles 3 + serialization.ddl struct test2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test2 + numFiles 3 + serialization.ddl struct test2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test2 + name: default.test2 + Truncated Path -> Alias: + /test2 [l] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- should not apply bucket mapjoin +explain extended select /* + MAPJOIN(R) */ * from test1 L join test1 R on L.key+L.key=R.key +PREHOOK: type: QUERY +POSTHOOK: query: -- should not apply bucket mapjoin +explain extended select /* + MAPJOIN(R) */ * from test1 L join test1 R on L.key+L.key=R.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test1 + L + TOK_TABREF + TOK_TABNAME + test1 + R + = + + + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + R + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + R + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: r + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 11 Data size: 2200 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: string), value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test1 + numFiles 3 + serialization.ddl struct test1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test1 + numFiles 3 + serialization.ddl struct test1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test1 + name: default.test1 + Truncated Path -> Alias: + /test1 [r] + Map 3 + Map Operator Tree: + TableScan + alias: l + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key + key) is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (key + key) (type: double) + sort order: + + Map-reduce partition columns: (key + key) (type: double) + Statistics: Num rows: 11 Data size: 2200 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string), value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test1 + numFiles 3 + serialization.ddl struct test1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test1 + numFiles 3 + serialization.ddl struct test1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test1 + name: default.test1 + Truncated Path -> Alias: + /test1 [l] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 12 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 12 Data size: 2420 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test1 L join test2 R on L.key=R.key AND L.value=R.value +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test1 L join test2 R on L.key=R.key AND L.value=R.value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test1 + L + TOK_TABREF + TOK_TABNAME + test2 + R + AND + = + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + R + key + = + . + TOK_TABLE_OR_COL + L + value + . + TOK_TABLE_OR_COL + R + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + R + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: r + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test2 + numFiles 3 + serialization.ddl struct test2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test2 + numFiles 3 + serialization.ddl struct test2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test2 + name: default.test2 + Truncated Path -> Alias: + /test2 [r] + Map 3 + Map Operator Tree: + TableScan + alias: l + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test1 + numFiles 3 + serialization.ddl struct test1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test1 + numFiles 3 + serialization.ddl struct test1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test1 + name: default.test1 + Truncated Path -> Alias: + /test1 [l] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test1 L join test3 R on L.key=R.key AND L.value=R.value +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test1 L join test3 R on L.key=R.key AND L.value=R.value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test1 + L + TOK_TABREF + TOK_TABNAME + test3 + R + AND + = + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + R + key + = + . + TOK_TABLE_OR_COL + L + value + . + TOK_TABLE_OR_COL + R + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + R + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: r + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test3 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test3 + numFiles 3 + serialization.ddl struct test3 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test3 + numFiles 3 + serialization.ddl struct test3 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test3 + name: default.test3 + Truncated Path -> Alias: + /test3 [r] + Map 3 + Map Operator Tree: + TableScan + alias: l + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test1 + numFiles 3 + serialization.ddl struct test1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test1 + numFiles 3 + serialization.ddl struct test1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test1 + name: default.test1 + Truncated Path -> Alias: + /test1 [l] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test1 L join test4 R on L.key=R.key AND L.value=R.value +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test1 L join test4 R on L.key=R.key AND L.value=R.value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test1 + L + TOK_TABREF + TOK_TABNAME + test4 + R + AND + = + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + R + key + = + . + TOK_TABLE_OR_COL + L + value + . + TOK_TABLE_OR_COL + R + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + R + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: r + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test4 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test4 + numFiles 3 + serialization.ddl struct test4 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test4 + numFiles 3 + serialization.ddl struct test4 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test4 + name: default.test4 + Truncated Path -> Alias: + /test4 [r] + Map 3 + Map Operator Tree: + TableScan + alias: l + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test1 + numFiles 3 + serialization.ddl struct test1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test1 + numFiles 3 + serialization.ddl struct test1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test1 + name: default.test1 + Truncated Path -> Alias: + /test1 [l] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test2 L join test3 R on L.key=R.key AND L.value=R.value +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test2 L join test3 R on L.key=R.key AND L.value=R.value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test2 + L + TOK_TABREF + TOK_TABNAME + test3 + R + AND + = + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + R + key + = + . + TOK_TABLE_OR_COL + L + value + . + TOK_TABLE_OR_COL + R + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + R + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: r + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test3 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test3 + numFiles 3 + serialization.ddl struct test3 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test3 + numFiles 3 + serialization.ddl struct test3 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test3 + name: default.test3 + Truncated Path -> Alias: + /test3 [r] + Map 3 + Map Operator Tree: + TableScan + alias: l + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test2 + numFiles 3 + serialization.ddl struct test2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test2 + numFiles 3 + serialization.ddl struct test2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test2 + name: default.test2 + Truncated Path -> Alias: + /test2 [l] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test2 L join test4 R on L.key=R.key AND L.value=R.value +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test2 L join test4 R on L.key=R.key AND L.value=R.value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test2 + L + TOK_TABREF + TOK_TABNAME + test4 + R + AND + = + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + R + key + = + . + TOK_TABLE_OR_COL + L + value + . + TOK_TABLE_OR_COL + R + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + R + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: r + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test4 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test4 + numFiles 3 + serialization.ddl struct test4 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test4 + numFiles 3 + serialization.ddl struct test4 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test4 + name: default.test4 + Truncated Path -> Alias: + /test4 [r] + Map 3 + Map Operator Tree: + TableScan + alias: l + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test2 + numFiles 3 + serialization.ddl struct test2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test2 + numFiles 3 + serialization.ddl struct test2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test2 + name: default.test2 + Truncated Path -> Alias: + /test2 [l] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test3 L join test4 R on L.key=R.key AND L.value=R.value +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select /* + MAPJOIN(R) */ * from test3 L join test4 R on L.key=R.key AND L.value=R.value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test3 + L + TOK_TABREF + TOK_TABNAME + test4 + R + AND + = + . + TOK_TABLE_OR_COL + L + key + . + TOK_TABLE_OR_COL + R + key + = + . + TOK_TABLE_OR_COL + L + value + . + TOK_TABLE_OR_COL + R + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + R + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: r + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test4 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test4 + numFiles 3 + serialization.ddl struct test4 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name value + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test4 + numFiles 3 + serialization.ddl struct test4 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test4 + name: default.test4 + Truncated Path -> Alias: + /test4 [r] + Map 3 + Map Operator Tree: + TableScan + alias: l + Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test3 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test3 + numFiles 3 + serialization.ddl struct test3 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.test3 + numFiles 3 + serialization.ddl struct test3 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 4200 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test3 + name: default.test3 + Truncated Path -> Alias: + /test3 [l] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/cross_join.q.out ql/src/test/results/clientpositive/spark/cross_join.q.out new file mode 100644 index 0000000..4a7f6a7 --- /dev/null +++ ql/src/test/results/clientpositive/spark/cross_join.q.out @@ -0,0 +1,195 @@ +PREHOOK: query: -- current +explain select src.key from src join src src2 +PREHOOK: type: QUERY +POSTHOOK: query: -- current +explain select src.key from src join src src2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- ansi cross join +explain select src.key from src cross join src src2 +PREHOOK: type: QUERY +POSTHOOK: query: -- ansi cross join +explain select src.key from src cross join src src2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- appending condition is allowed +explain select src.key from src cross join src src2 on src.key=src2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- appending condition is allowed +explain select src.key from src cross join src src2 on src.key=src2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/decimal_join.q.out ql/src/test/results/clientpositive/spark/decimal_join.q.out new file mode 100644 index 0000000..940ffc5 --- /dev/null +++ ql/src/test/results/clientpositive/spark/decimal_join.q.out @@ -0,0 +1,80 @@ +PREHOOK: query: -- HIVE-5292 Join on decimal columns fails + +create table src_dec (key decimal(3,0), value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_dec +POSTHOOK: query: -- HIVE-5292 Join on decimal columns fails + +create table src_dec (key decimal(3,0), value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_dec +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table src_dec +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@src_dec +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table src_dec +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@src_dec +PREHOOK: query: select * from src_dec a join src_dec b on a.key=b.key+450 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_dec +#### A masked pattern was here #### +POSTHOOK: query: select * from src_dec a join src_dec b on a.key=b.key+450 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_dec +#### A masked pattern was here #### +452 val_452 2 val_2 +454 val_454 4 val_4 +454 val_454 4 val_4 +454 val_454 4 val_4 +455 val_455 5 val_5 +455 val_455 5 val_5 +455 val_455 5 val_5 +458 val_458 8 val_8 +458 val_458 8 val_8 +459 val_459 9 val_9 +459 val_459 9 val_9 +460 val_460 10 val_10 +462 val_462 12 val_12 +462 val_462 12 val_12 +462 val_462 12 val_12 +462 val_462 12 val_12 +467 val_467 17 val_17 +468 val_468 18 val_18 +468 val_468 18 val_18 +468 val_468 18 val_18 +468 val_468 18 val_18 +468 val_468 18 val_18 +468 val_468 18 val_18 +468 val_468 18 val_18 +468 val_468 18 val_18 +469 val_469 19 val_19 +469 val_469 19 val_19 +469 val_469 19 val_19 +469 val_469 19 val_19 +469 val_469 19 val_19 +470 val_470 20 val_20 +477 val_477 27 val_27 +478 val_478 28 val_28 +478 val_478 28 val_28 +480 val_480 30 val_30 +480 val_480 30 val_30 +480 val_480 30 val_30 +483 val_483 33 val_33 +484 val_484 34 val_34 +485 val_485 35 val_35 +485 val_485 35 val_35 +485 val_485 35 val_35 +487 val_487 37 val_37 +487 val_487 37 val_37 +491 val_491 41 val_41 +492 val_492 42 val_42 +492 val_492 42 val_42 +492 val_492 42 val_42 +492 val_492 42 val_42 +493 val_493 43 val_43 +494 val_494 44 val_44 +497 val_497 47 val_47 diff --git ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out new file mode 100644 index 0000000..d5f02be --- /dev/null +++ ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out @@ -0,0 +1,435 @@ +PREHOOK: query: CREATE TABLE filter_join_breaktask(key int, value string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@filter_join_breaktask +POSTHOOK: query: CREATE TABLE filter_join_breaktask(key int, value string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@filter_join_breaktask +PREHOOK: query: INSERT OVERWRITE TABLE filter_join_breaktask PARTITION(ds='2008-04-08') +SELECT key, value from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@filter_join_breaktask@ds=2008-04-08 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE filter_join_breaktask PARTITION(ds='2008-04-08') +SELECT key, value from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@filter_join_breaktask@ds=2008-04-08 +POSTHOOK: Lineage: filter_join_breaktask PARTITION(ds=2008-04-08).key EXPRESSION [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: filter_join_breaktask PARTITION(ds=2008-04-08).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: EXPLAIN EXTENDED +SELECT f.key, g.value +FROM filter_join_breaktask f JOIN filter_join_breaktask m ON( f.key = m.key AND f.ds='2008-04-08' AND m.ds='2008-04-08' AND f.key is not null) +JOIN filter_join_breaktask g ON(g.value = m.value AND g.ds='2008-04-08' AND m.ds='2008-04-08' AND m.value is not null AND m.value !='') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +SELECT f.key, g.value +FROM filter_join_breaktask f JOIN filter_join_breaktask m ON( f.key = m.key AND f.ds='2008-04-08' AND m.ds='2008-04-08' AND f.key is not null) +JOIN filter_join_breaktask g ON(g.value = m.value AND g.ds='2008-04-08' AND m.ds='2008-04-08' AND m.value is not null AND m.value !='') +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_JOIN + TOK_TABREF + TOK_TABNAME + filter_join_breaktask + f + TOK_TABREF + TOK_TABNAME + filter_join_breaktask + m + AND + AND + AND + = + . + TOK_TABLE_OR_COL + f + key + . + TOK_TABLE_OR_COL + m + key + = + . + TOK_TABLE_OR_COL + f + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + m + ds + '2008-04-08' + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + f + key + TOK_TABREF + TOK_TABNAME + filter_join_breaktask + g + AND + AND + AND + AND + = + . + TOK_TABLE_OR_COL + g + value + . + TOK_TABLE_OR_COL + m + value + = + . + TOK_TABLE_OR_COL + g + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + m + ds + '2008-04-08' + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + m + value + != + . + TOK_TABLE_OR_COL + m + value + '' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + f + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + g + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 59 Data size: 236 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 30 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 30 Data size: 120 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.filter_join_breaktask + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct filter_join_breaktask { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 236 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.filter_join_breaktask + partition_columns ds + partition_columns.types string + serialization.ddl struct filter_join_breaktask { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.filter_join_breaktask + name: default.filter_join_breaktask + Truncated Path -> Alias: + /filter_join_breaktask/ds=2008-04-08 [f] + Map 4 + Map Operator Tree: + TableScan + alias: g + Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((value <> '') and value is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.filter_join_breaktask + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct filter_join_breaktask { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 236 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.filter_join_breaktask + partition_columns ds + partition_columns.types string + serialization.ddl struct filter_join_breaktask { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.filter_join_breaktask + name: default.filter_join_breaktask + Truncated Path -> Alias: + /filter_join_breaktask/ds=2008-04-08 [g] + Map 5 + Map Operator Tree: + TableScan + alias: m + Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key is not null and value is not null) and (value <> '')) (type: boolean) + Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.filter_join_breaktask + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct filter_join_breaktask { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 236 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.filter_join_breaktask + partition_columns ds + partition_columns.types string + serialization.ddl struct filter_join_breaktask { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.filter_join_breaktask + name: default.filter_join_breaktask + Truncated Path -> Alias: + /filter_join_breaktask/ds=2008-04-08 [m] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col7 + Statistics: Num rows: 33 Data size: 132 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col7 (type: string) + sort order: + + Map-reduce partition columns: _col7 (type: string) + Statistics: Num rows: 33 Data size: 132 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col0 (type: int) + auto parallelism: true + Reducer 3 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col13 + Statistics: Num rows: 36 Data size: 145 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col13 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 145 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 36 Data size: 145 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT f.key, g.value +FROM filter_join_breaktask f JOIN filter_join_breaktask m ON( f.key = m.key AND f.ds='2008-04-08' AND m.ds='2008-04-08' AND f.key is not null) +JOIN filter_join_breaktask g ON(g.value = m.value AND g.ds='2008-04-08' AND m.ds='2008-04-08' AND m.value is not null AND m.value !='') +PREHOOK: type: QUERY +PREHOOK: Input: default@filter_join_breaktask +PREHOOK: Input: default@filter_join_breaktask@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: SELECT f.key, g.value +FROM filter_join_breaktask f JOIN filter_join_breaktask m ON( f.key = m.key AND f.ds='2008-04-08' AND m.ds='2008-04-08' AND f.key is not null) +JOIN filter_join_breaktask g ON(g.value = m.value AND g.ds='2008-04-08' AND m.ds='2008-04-08' AND m.value is not null AND m.value !='') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@filter_join_breaktask +POSTHOOK: Input: default@filter_join_breaktask@ds=2008-04-08 +#### A masked pattern was here #### +146 val_146 +150 val_150 +213 val_213 +238 val_238 +255 val_255 +273 val_273 +278 val_278 +311 val_311 +401 val_401 +406 val_406 +66 val_66 +98 val_98 diff --git ql/src/test/results/clientpositive/spark/filter_join_breaktask2.q.out ql/src/test/results/clientpositive/spark/filter_join_breaktask2.q.out new file mode 100644 index 0000000..c2daf8c --- /dev/null +++ ql/src/test/results/clientpositive/spark/filter_join_breaktask2.q.out @@ -0,0 +1,271 @@ +PREHOOK: query: create table T1(c1 string, c2 string, c3 string, c4 string, c5 string, c6 string, c7 string) +partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: create table T1(c1 string, c2 string, c3 string, c4 string, c5 string, c6 string, c7 string) +partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: create table T2(c1 string, c2 string, c3 string, c0 string, c4 string, c5 string, c6 string, c7 string, c8 string, c9 string, c10 string, c11 string, c12 string, c13 string, c14 string, c15 string, c16 string, c17 string, c18 string, c19 string, c20 string, c21 string, c22 string, c23 string, c24 string, c25 string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: create table T2(c1 string, c2 string, c3 string, c0 string, c4 string, c5 string, c6 string, c7 string, c8 string, c9 string, c10 string, c11 string, c12 string, c13 string, c14 string, c15 string, c16 string, c17 string, c18 string, c19 string, c20 string, c21 string, c22 string, c23 string, c24 string, c25 string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: create table T3 (c0 bigint, c1 bigint, c2 int) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: create table T3 (c0 bigint, c1 bigint, c2 int) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: create table T4 (c0 bigint, c1 string, c2 string, c3 string, c4 string, c5 string, c6 string, c7 string, c8 string, c9 string, c10 string, c11 string, c12 string, c13 string, c14 string, c15 string, c16 string, c17 string, c18 string, c19 string, c20 string, c21 string, c22 string, c23 string, c24 string, c25 string, c26 string, c27 string, c28 string, c29 string, c30 string, c31 string, c32 string, c33 string, c34 string, c35 string, c36 string, c37 string, c38 string, c39 string, c40 string, c41 string, c42 string, c43 string, c44 string, c45 string, c46 string, c47 string, c48 string, c49 string, c50 string, c51 string, c52 string, c53 string, c54 string, c55 string, c56 string, c57 string, c58 string, c59 string, c60 string, c61 string, c62 string, c63 string, c64 string, c65 string, c66 string, c67 bigint, c68 string, c69 string, c70 bigint, c71 bigint, c72 bigint, c73 string, c74 string, c75 string, c76 string, c77 string, c78 string, c79 string, c80 string, c81 bigint, c82 bigint, c83 bigint) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T4 +POSTHOOK: query: create table T4 (c0 bigint, c1 string, c2 string, c3 string, c4 string, c5 string, c6 string, c7 string, c8 string, c9 string, c10 string, c11 string, c12 string, c13 string, c14 string, c15 string, c16 string, c17 string, c18 string, c19 string, c20 string, c21 string, c22 string, c23 string, c24 string, c25 string, c26 string, c27 string, c28 string, c29 string, c30 string, c31 string, c32 string, c33 string, c34 string, c35 string, c36 string, c37 string, c38 string, c39 string, c40 string, c41 string, c42 string, c43 string, c44 string, c45 string, c46 string, c47 string, c48 string, c49 string, c50 string, c51 string, c52 string, c53 string, c54 string, c55 string, c56 string, c57 string, c58 string, c59 string, c60 string, c61 string, c62 string, c63 string, c64 string, c65 string, c66 string, c67 bigint, c68 string, c69 string, c70 bigint, c71 bigint, c72 bigint, c73 string, c74 string, c75 string, c76 string, c77 string, c78 string, c79 string, c80 string, c81 bigint, c82 bigint, c83 bigint) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T4 +PREHOOK: query: insert overwrite table T1 partition (ds='2010-04-17') select '5', '1', '1', '1', 0, 0,4 from src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t1@ds=2010-04-17 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table T1 partition (ds='2010-04-17') select '5', '1', '1', '1', 0, 0,4 from src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t1@ds=2010-04-17 +POSTHOOK: Lineage: t1 PARTITION(ds=2010-04-17).c1 SIMPLE [] +POSTHOOK: Lineage: t1 PARTITION(ds=2010-04-17).c2 SIMPLE [] +POSTHOOK: Lineage: t1 PARTITION(ds=2010-04-17).c3 SIMPLE [] +POSTHOOK: Lineage: t1 PARTITION(ds=2010-04-17).c4 SIMPLE [] +POSTHOOK: Lineage: t1 PARTITION(ds=2010-04-17).c5 SIMPLE [] +POSTHOOK: Lineage: t1 PARTITION(ds=2010-04-17).c6 SIMPLE [] +POSTHOOK: Lineage: t1 PARTITION(ds=2010-04-17).c7 SIMPLE [] +PREHOOK: query: insert overwrite table T2 partition(ds='2010-04-17') select '5','name', NULL, '2', 'kavin',NULL, '9', 'c', '8', '0', '0', '7', '1','2', '0', '3','2', NULL, '1', NULL, '3','2','0','0','5','10' from src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t2@ds=2010-04-17 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table T2 partition(ds='2010-04-17') select '5','name', NULL, '2', 'kavin',NULL, '9', 'c', '8', '0', '0', '7', '1','2', '0', '3','2', NULL, '1', NULL, '3','2','0','0','5','10' from src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t2@ds=2010-04-17 +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c0 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c1 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c10 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c11 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c12 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c13 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c14 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c15 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c16 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c17 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c18 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c19 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c2 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c20 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c21 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c22 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c23 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c24 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c25 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c3 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c4 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c5 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c6 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c7 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c8 SIMPLE [] +POSTHOOK: Lineage: t2 PARTITION(ds=2010-04-17).c9 SIMPLE [] +PREHOOK: query: insert overwrite table T3 partition (ds='2010-04-17') select 4,5,0 from src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t3@ds=2010-04-17 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table T3 partition (ds='2010-04-17') select 4,5,0 from src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t3@ds=2010-04-17 +POSTHOOK: Lineage: t3 PARTITION(ds=2010-04-17).c0 EXPRESSION [] +POSTHOOK: Lineage: t3 PARTITION(ds=2010-04-17).c1 EXPRESSION [] +POSTHOOK: Lineage: t3 PARTITION(ds=2010-04-17).c2 SIMPLE [] +PREHOOK: query: insert overwrite table T4 partition(ds='2010-04-17') +select 4,'1','1','8','4','5','1','0','9','U','2','2', '0','2','1','1','J','C','A','U', '2','s', '2',NULL, NULL, NULL,NULL, NULL, NULL,'1','j', 'S', '6',NULL,'1', '2', 'J', 'g', '1', 'e', '2', '1', '2', 'U', 'P', 'p', '3', '0', '0', '0', '1', '1', '1', '0', '0', '0', '6', '2', 'j',NULL, NULL, NULL,NULL,NULL, NULL, '5',NULL, 'j', 'j', 2, 2, 1, '2', '2', '1', '1', '1', '1', '1', '1', 1, 1, 32,NULL from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t4@ds=2010-04-17 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table T4 partition(ds='2010-04-17') +select 4,'1','1','8','4','5','1','0','9','U','2','2', '0','2','1','1','J','C','A','U', '2','s', '2',NULL, NULL, NULL,NULL, NULL, NULL,'1','j', 'S', '6',NULL,'1', '2', 'J', 'g', '1', 'e', '2', '1', '2', 'U', 'P', 'p', '3', '0', '0', '0', '1', '1', '1', '0', '0', '0', '6', '2', 'j',NULL, NULL, NULL,NULL,NULL, NULL, '5',NULL, 'j', 'j', 2, 2, 1, '2', '2', '1', '1', '1', '1', '1', '1', 1, 1, 32,NULL from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t4@ds=2010-04-17 +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c0 EXPRESSION [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c1 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c10 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c11 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c12 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c13 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c14 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c15 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c16 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c17 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c18 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c19 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c2 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c20 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c21 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c22 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c23 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c24 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c25 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c26 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c27 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c28 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c29 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c3 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c30 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c31 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c32 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c33 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c34 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c35 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c36 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c37 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c38 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c39 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c4 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c40 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c41 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c42 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c43 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c44 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c45 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c46 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c47 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c48 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c49 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c5 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c50 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c51 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c52 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c53 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c54 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c55 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c56 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c57 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c58 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c59 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c6 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c60 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c61 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c62 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c63 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c64 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c65 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c66 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c67 EXPRESSION [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c68 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c69 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c7 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c70 EXPRESSION [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c71 EXPRESSION [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c72 EXPRESSION [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c73 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c74 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c75 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c76 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c77 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c78 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c79 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c8 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c80 SIMPLE [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c81 EXPRESSION [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c82 EXPRESSION [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c83 EXPRESSION [] +POSTHOOK: Lineage: t4 PARTITION(ds=2010-04-17).c9 SIMPLE [] +PREHOOK: query: select * from T2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t2@ds=2010-04-17 +#### A masked pattern was here #### +POSTHOOK: query: select * from T2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t2@ds=2010-04-17 +#### A masked pattern was here #### +5 name NULL 2 kavin NULL 9 c 8 0 0 7 1 2 0 3 2 NULL 1 NULL 3 2 0 0 5 10 2010-04-17 +PREHOOK: query: select * from T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@ds=2010-04-17 +#### A masked pattern was here #### +POSTHOOK: query: select * from T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@ds=2010-04-17 +#### A masked pattern was here #### +5 1 1 1 0 0 4 2010-04-17 +PREHOOK: query: select * from T3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t3 +PREHOOK: Input: default@t3@ds=2010-04-17 +#### A masked pattern was here #### +POSTHOOK: query: select * from T3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t3 +POSTHOOK: Input: default@t3@ds=2010-04-17 +#### A masked pattern was here #### +4 5 0 2010-04-17 +PREHOOK: query: select * from T4 +PREHOOK: type: QUERY +PREHOOK: Input: default@t4 +PREHOOK: Input: default@t4@ds=2010-04-17 +#### A masked pattern was here #### +POSTHOOK: query: select * from T4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t4 +POSTHOOK: Input: default@t4@ds=2010-04-17 +#### A masked pattern was here #### +4 1 1 8 4 5 1 0 9 U 2 2 0 2 1 1 J C A U 2 s 2 NULL NULL NULL NULL NULL NULL 1 j S 6 NULL 1 2 J g 1 e 2 1 2 U P p 3 0 0 0 1 1 1 0 0 0 6 2 j NULL NULL NULL NULL NULL NULL 5 NULL NULL j 2 2 1 2 2 1 1 1 1 1 1 1 1 32 NULL 2010-04-17 +PREHOOK: query: SELECT a.c1 as a_c1, b.c1 b_c1, d.c0 as d_c0 +FROM T1 a JOIN T2 b + ON (a.c1 = b.c1 AND a.ds='2010-04-17' AND b.ds='2010-04-17') + JOIN T3 c + ON (a.c1 = c.c1 AND a.ds='2010-04-17' AND c.ds='2010-04-17') + JOIN T4 d + ON (c.c0 = d.c0 AND c.ds='2010-04-17' AND d.ds='2010-04-17') +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t1@ds=2010-04-17 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t2@ds=2010-04-17 +PREHOOK: Input: default@t3 +PREHOOK: Input: default@t3@ds=2010-04-17 +PREHOOK: Input: default@t4 +PREHOOK: Input: default@t4@ds=2010-04-17 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.c1 as a_c1, b.c1 b_c1, d.c0 as d_c0 +FROM T1 a JOIN T2 b + ON (a.c1 = b.c1 AND a.ds='2010-04-17' AND b.ds='2010-04-17') + JOIN T3 c + ON (a.c1 = c.c1 AND a.ds='2010-04-17' AND c.ds='2010-04-17') + JOIN T4 d + ON (c.c0 = d.c0 AND c.ds='2010-04-17' AND d.ds='2010-04-17') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t1@ds=2010-04-17 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t2@ds=2010-04-17 +POSTHOOK: Input: default@t3 +POSTHOOK: Input: default@t3@ds=2010-04-17 +POSTHOOK: Input: default@t4 +POSTHOOK: Input: default@t4@ds=2010-04-17 +#### A masked pattern was here #### +5 5 4 diff --git ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out new file mode 100644 index 0000000..537847b --- /dev/null +++ ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out @@ -0,0 +1,215 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- try the query without indexing, with manual indexing, and with automatic indexing + +EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +PREHOOK: type: QUERY +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- try the query without indexing, with manual indexing, and with automatic indexing + +EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((value is not null and (key > 80)) and (key < 100)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col5 + Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +82 82 +83 83 +83 83 +83 83 +83 83 +84 84 +84 84 +84 84 +84 84 +85 85 +86 86 +87 87 +PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +PREHOOK: Input: default@src +POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src_index__ +PREHOOK: query: ALTER INDEX src_index ON src REBUILD +PREHOOK: type: ALTERINDEX_REBUILD +PREHOOK: Input: default@src +PREHOOK: Output: default@default__src_src_index__ +POSTHOOK: query: ALTER INDEX src_index ON src REBUILD +POSTHOOK: type: ALTERINDEX_REBUILD +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src_index__ +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + filterExpr: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + filterExpr: ((value is not null and (key > 80)) and (key < 100)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((value is not null and (key > 80)) and (key < 100)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col5 + Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +82 82 +83 83 +83 83 +83 83 +83 83 +84 84 +84 84 +84 84 +84 84 +85 85 +86 86 +87 87 +PREHOOK: query: DROP INDEX src_index on src +PREHOOK: type: DROPINDEX +PREHOOK: Input: default@src +POSTHOOK: query: DROP INDEX src_index on src +POSTHOOK: type: DROPINDEX +POSTHOOK: Input: default@src diff --git ql/src/test/results/clientpositive/spark/infer_bucket_sort_convert_join.q.out ql/src/test/results/clientpositive/spark/infer_bucket_sort_convert_join.q.out new file mode 100644 index 0000000..c99ff55 --- /dev/null +++ ql/src/test/results/clientpositive/spark/infer_bucket_sort_convert_join.q.out @@ -0,0 +1,132 @@ +PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where joins may be auto converted to map joins. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table +POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer +-- and populating that information in partitions' metadata. In particular, those cases +-- where joins may be auto converted to map joins. + +CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table +PREHOOK: query: -- Tests a join which is converted to a map join, the output should be neither bucketed nor sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: -- Tests a join which is converted to a map join, the output should be neither bucketed nor sorted +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test_table +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test_table +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +part string + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows -1 + rawDataSize -1 + totalSize 11996 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: -- This test tests the scenario when the mapper dies. So, create a conditional task for the mapjoin. +-- Tests a join which is not converted to a map join, the output should be bucketed and sorted. + +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table@part=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: -- This test tests the scenario when the mapper dies. So, create a conditional task for the mapjoin. +-- Tests a join which is not converted to a map join, the output should be bucketed and sorted. + +INSERT OVERWRITE TABLE test_table PARTITION (part = '1') +SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table@part=1 +POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test_table +POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test_table +# col_name data_type comment + +key string +value string + +# Partition Information +# col_name data_type comment + +part string + +# Detailed Partition Information +Partition Value: [1] +Database: default +Table: test_table +#### A masked pattern was here #### +Protect Mode: None +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE true + numFiles 1 + numRows -1 + rawDataSize -1 + totalSize 11996 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 diff --git ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out new file mode 100644 index 0000000..57c4516 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out @@ -0,0 +1,492 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN + SELECT a.key, a.value, b.key, b.value1, b.value2 + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +PREHOOK: type: QUERY +POSTHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN + SELECT a.key, a.value, b.key, b.value1, b.value2 + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT value), count(DISTINCT key) + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.key, a.value, b.key, b.value1, b.value2 + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.key, a.value, b.key, b.value1, b.value2 + FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM src1 src2 group by src2.key + ) b + ON (a.key = b.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +0 3 NULL NULL NULL +10 1 NULL NULL NULL +100 2 NULL NULL NULL +103 2 NULL NULL NULL +104 2 NULL NULL NULL +105 1 NULL NULL NULL +11 1 NULL NULL NULL +111 1 NULL NULL NULL +113 2 NULL NULL NULL +114 1 NULL NULL NULL +116 1 NULL NULL NULL +118 2 NULL NULL NULL +119 3 NULL NULL NULL +12 2 NULL NULL NULL +120 2 NULL NULL NULL +125 2 NULL NULL NULL +126 1 NULL NULL NULL +128 3 128 1 1 +129 2 NULL NULL NULL +131 1 NULL NULL NULL +133 1 NULL NULL NULL +134 2 NULL NULL NULL +136 1 NULL NULL NULL +137 2 NULL NULL NULL +138 4 NULL NULL NULL +143 1 NULL NULL NULL +145 1 NULL NULL NULL +146 2 146 1 1 +149 2 NULL NULL NULL +15 2 NULL NULL NULL +150 1 150 1 1 +152 2 NULL NULL NULL +153 1 NULL NULL NULL +155 1 NULL NULL NULL +156 1 NULL NULL NULL +157 1 NULL NULL NULL +158 1 NULL NULL NULL +160 1 NULL NULL NULL +162 1 NULL NULL NULL +163 1 NULL NULL NULL +164 2 NULL NULL NULL +165 2 NULL NULL NULL +166 1 NULL NULL NULL +167 3 NULL NULL NULL +168 1 NULL NULL NULL +169 4 NULL NULL NULL +17 1 NULL NULL NULL +170 1 NULL NULL NULL +172 2 NULL NULL NULL +174 2 NULL NULL NULL +175 2 NULL NULL NULL +176 2 NULL NULL NULL +177 1 NULL NULL NULL +178 1 NULL NULL NULL +179 2 NULL NULL NULL +18 2 NULL NULL NULL +180 1 NULL NULL NULL +181 1 NULL NULL NULL +183 1 NULL NULL NULL +186 1 NULL NULL NULL +187 3 NULL NULL NULL +189 1 NULL NULL NULL +19 1 NULL NULL NULL +190 1 NULL NULL NULL +191 2 NULL NULL NULL +192 1 NULL NULL NULL +193 3 NULL NULL NULL +194 1 NULL NULL NULL +195 2 NULL NULL NULL +196 1 NULL NULL NULL +197 2 NULL NULL NULL +199 3 NULL NULL NULL +2 1 NULL NULL NULL +20 1 NULL NULL NULL +200 2 NULL NULL NULL +201 1 NULL NULL NULL +202 1 NULL NULL NULL +203 2 NULL NULL NULL +205 2 NULL NULL NULL +207 2 NULL NULL NULL +208 3 NULL NULL NULL +209 2 NULL NULL NULL +213 2 213 1 1 +214 1 NULL NULL NULL +216 2 NULL NULL NULL +217 2 NULL NULL NULL +218 1 NULL NULL NULL +219 2 NULL NULL NULL +221 2 NULL NULL NULL +222 1 NULL NULL NULL +223 2 NULL NULL NULL +224 2 224 1 1 +226 1 NULL NULL NULL +228 1 NULL NULL NULL +229 2 NULL NULL NULL +230 5 NULL NULL NULL +233 2 NULL NULL NULL +235 1 NULL NULL NULL +237 2 NULL NULL NULL +238 2 238 1 1 +239 2 NULL NULL NULL +24 2 NULL NULL NULL +241 1 NULL NULL NULL +242 2 NULL NULL NULL +244 1 NULL NULL NULL +247 1 NULL NULL NULL +248 1 NULL NULL NULL +249 1 NULL NULL NULL +252 1 NULL NULL NULL +255 2 255 1 1 +256 2 NULL NULL NULL +257 1 NULL NULL NULL +258 1 NULL NULL NULL +26 2 NULL NULL NULL +260 1 NULL NULL NULL +262 1 NULL NULL NULL +263 1 NULL NULL NULL +265 2 NULL NULL NULL +266 1 NULL NULL NULL +27 1 NULL NULL NULL +272 2 NULL NULL NULL +273 3 273 1 1 +274 1 NULL NULL NULL +275 1 NULL NULL NULL +277 4 NULL NULL NULL +278 2 278 1 1 +28 1 NULL NULL NULL +280 2 NULL NULL NULL +281 2 NULL NULL NULL +282 2 NULL NULL NULL +283 1 NULL NULL NULL +284 1 NULL NULL NULL +285 1 NULL NULL NULL +286 1 NULL NULL NULL +287 1 NULL NULL NULL +288 2 NULL NULL NULL +289 1 NULL NULL NULL +291 1 NULL NULL NULL +292 1 NULL NULL NULL +296 1 NULL NULL NULL +298 3 NULL NULL NULL +30 1 NULL NULL NULL +302 1 NULL NULL NULL +305 1 NULL NULL NULL +306 1 NULL NULL NULL +307 2 NULL NULL NULL +308 1 NULL NULL NULL +309 2 NULL NULL NULL +310 1 NULL NULL NULL +311 3 311 1 1 +315 1 NULL NULL NULL +316 3 NULL NULL NULL +317 2 NULL NULL NULL +318 3 NULL NULL NULL +321 2 NULL NULL NULL +322 2 NULL NULL NULL +323 1 NULL NULL NULL +325 2 NULL NULL NULL +327 3 NULL NULL NULL +33 1 NULL NULL NULL +331 2 NULL NULL NULL +332 1 NULL NULL NULL +333 2 NULL NULL NULL +335 1 NULL NULL NULL +336 1 NULL NULL NULL +338 1 NULL NULL NULL +339 1 NULL NULL NULL +34 1 NULL NULL NULL +341 1 NULL NULL NULL +342 2 NULL NULL NULL +344 2 NULL NULL NULL +345 1 NULL NULL NULL +348 5 NULL NULL NULL +35 3 NULL NULL NULL +351 1 NULL NULL NULL +353 2 NULL NULL NULL +356 1 NULL NULL NULL +360 1 NULL NULL NULL +362 1 NULL NULL NULL +364 1 NULL NULL NULL +365 1 NULL NULL NULL +366 1 NULL NULL NULL +367 2 NULL NULL NULL +368 1 NULL NULL NULL +369 3 369 1 1 +37 2 NULL NULL NULL +373 1 NULL NULL NULL +374 1 NULL NULL NULL +375 1 NULL NULL NULL +377 1 NULL NULL NULL +378 1 NULL NULL NULL +379 1 NULL NULL NULL +382 2 NULL NULL NULL +384 3 NULL NULL NULL +386 1 NULL NULL NULL +389 1 NULL NULL NULL +392 1 NULL NULL NULL +393 1 NULL NULL NULL +394 1 NULL NULL NULL +395 2 NULL NULL NULL +396 3 NULL NULL NULL +397 2 NULL NULL NULL +399 2 NULL NULL NULL +4 1 NULL NULL NULL +400 1 NULL NULL NULL +401 5 401 1 1 +402 1 NULL NULL NULL +403 3 NULL NULL NULL +404 2 NULL NULL NULL +406 4 406 1 1 +407 1 NULL NULL NULL +409 3 NULL NULL NULL +41 1 NULL NULL NULL +411 1 NULL NULL NULL +413 2 NULL NULL NULL +414 2 NULL NULL NULL +417 3 NULL NULL NULL +418 1 NULL NULL NULL +419 1 NULL NULL NULL +42 2 NULL NULL NULL +421 1 NULL NULL NULL +424 2 NULL NULL NULL +427 1 NULL NULL NULL +429 2 NULL NULL NULL +43 1 NULL NULL NULL +430 3 NULL NULL NULL +431 3 NULL NULL NULL +432 1 NULL NULL NULL +435 1 NULL NULL NULL +436 1 NULL NULL NULL +437 1 NULL NULL NULL +438 3 NULL NULL NULL +439 2 NULL NULL NULL +44 1 NULL NULL NULL +443 1 NULL NULL NULL +444 1 NULL NULL NULL +446 1 NULL NULL NULL +448 1 NULL NULL NULL +449 1 NULL NULL NULL +452 1 NULL NULL NULL +453 1 NULL NULL NULL +454 3 NULL NULL NULL +455 1 NULL NULL NULL +457 1 NULL NULL NULL +458 2 NULL NULL NULL +459 2 NULL NULL NULL +460 1 NULL NULL NULL +462 2 NULL NULL NULL +463 2 NULL NULL NULL +466 3 NULL NULL NULL +467 1 NULL NULL NULL +468 4 NULL NULL NULL +469 5 NULL NULL NULL +47 1 NULL NULL NULL +470 1 NULL NULL NULL +472 1 NULL NULL NULL +475 1 NULL NULL NULL +477 1 NULL NULL NULL +478 2 NULL NULL NULL +479 1 NULL NULL NULL +480 3 NULL NULL NULL +481 1 NULL NULL NULL +482 1 NULL NULL NULL +483 1 NULL NULL NULL +484 1 NULL NULL NULL +485 1 NULL NULL NULL +487 1 NULL NULL NULL +489 4 NULL NULL NULL +490 1 NULL NULL NULL +491 1 NULL NULL NULL +492 2 NULL NULL NULL +493 1 NULL NULL NULL +494 1 NULL NULL NULL +495 1 NULL NULL NULL +496 1 NULL NULL NULL +497 1 NULL NULL NULL +498 3 NULL NULL NULL +5 3 NULL NULL NULL +51 2 NULL NULL NULL +53 1 NULL NULL NULL +54 1 NULL NULL NULL +57 1 NULL NULL NULL +58 2 NULL NULL NULL +64 1 NULL NULL NULL +65 1 NULL NULL NULL +66 1 66 1 1 +67 2 NULL NULL NULL +69 1 NULL NULL NULL +70 3 NULL NULL NULL +72 2 NULL NULL NULL +74 1 NULL NULL NULL +76 2 NULL NULL NULL +77 1 NULL NULL NULL +78 1 NULL NULL NULL +8 1 NULL NULL NULL +80 1 NULL NULL NULL +82 1 NULL NULL NULL +83 2 NULL NULL NULL +84 2 NULL NULL NULL +85 1 NULL NULL NULL +86 1 NULL NULL NULL +87 1 NULL NULL NULL +9 1 NULL NULL NULL +90 3 NULL NULL NULL +92 1 NULL NULL NULL +95 2 NULL NULL NULL +96 1 NULL NULL NULL +97 2 NULL NULL NULL +98 2 98 1 1 +NULL NULL 7 1 diff --git ql/src/test/results/clientpositive/spark/join28.q.out ql/src/test/results/clientpositive/spark/join28.q.out new file mode 100644 index 0000000..87d6819 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join28.q.out @@ -0,0 +1,286 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j1 +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT subq.key1, z.value +FROM +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT subq.key1, z.value +FROM +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col5 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT subq.key1, z.value +FROM +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@dest_j1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT subq.key1, z.value +FROM +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from dest_j1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +150 val_150 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +66 val_66 +98 val_98 +98 val_98 +98 val_98 +98 val_98 diff --git ql/src/test/results/clientpositive/spark/join29.q.out ql/src/test/results/clientpositive/spark/join29.q.out new file mode 100644 index 0000000..d5383d5 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join29.q.out @@ -0,0 +1,209 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j1 +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT subq1.key, subq1.cnt, subq2.cnt +FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN + (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) +PREHOOK: type: QUERY +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT subq1.key, subq1.cnt, subq2.cnt +FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN + (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT subq1.key, subq1.cnt, subq2.cnt +FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN + (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest_j1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT subq1.key, subq1.cnt, subq2.cnt +FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN + (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.cnt1 EXPRESSION [(src1)x.null, ] +POSTHOOK: Lineage: dest_j1.cnt2 EXPRESSION [(src)y.null, ] +POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: select * from dest_j1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +128 1 3 +146 1 2 +150 1 1 +213 1 2 +224 1 2 +238 1 2 +255 1 2 +273 1 3 +278 1 2 +311 1 3 +369 1 3 +401 1 5 +406 1 4 +66 1 1 +98 1 2 diff --git ql/src/test/results/clientpositive/spark/join30.q.out ql/src/test/results/clientpositive/spark/join30.q.out new file mode 100644 index 0000000..5c16622 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join30.q.out @@ -0,0 +1,161 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key INT, cnt INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key INT, cnt INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j1 +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest_j1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, (src)y.null, ] +POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: select * from dest_j1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +128 3 +146 2 +150 1 +213 2 +224 2 +238 2 +255 2 +273 3 +278 2 +311 3 +369 3 +401 5 +406 4 +66 1 +98 2 diff --git ql/src/test/results/clientpositive/spark/join31.q.out ql/src/test/results/clientpositive/spark/join31.q.out new file mode 100644 index 0000000..9193df9 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join31.q.out @@ -0,0 +1,235 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, cnt INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, cnt INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j1 +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT subq1.key, count(1) as cnt +FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN + (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) +group by subq1.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT subq1.key, count(1) as cnt +FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN + (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) +group by subq1.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Map 5 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT subq1.key, count(1) as cnt +FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN + (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) +group by subq1.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest_j1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT subq1.key, count(1) as cnt +FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN + (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) +group by subq1.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, (src)y.null, ] +POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: select * from dest_j1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +128 1 +146 1 +150 1 +213 1 +224 1 +238 1 +255 1 +273 1 +278 1 +311 1 +369 1 +401 1 +406 1 +66 1 +98 1 diff --git ql/src/test/results/clientpositive/spark/join32.q.out ql/src/test/results/clientpositive/spark/join32.q.out new file mode 100644 index 0000000..5aea3f3 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join32.q.out @@ -0,0 +1,524 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j1 +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src1 + x + TOK_TABREF + TOK_TABNAME + src + y + = + . + TOK_TABLE_OR_COL + x + key + . + TOK_TABLE_OR_COL + y + key + TOK_TABREF + TOK_TABNAME + srcpart + z + and + and + = + . + TOK_TABLE_OR_COL + x + value + . + TOK_TABLE_OR_COL + z + value + = + . + TOK_TABLE_OR_COL + z + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + z + hr + 11 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest_j1 + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + z + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + y + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [z] + Map 3 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [y] + Map 5 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [x] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col5} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col6, _col11 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Reducer 4 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col0 (type: string), _col6 (type: string) + auto parallelism: true + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@dest_j1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from dest_j1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +150 val_150 val_150 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +66 val_66 val_66 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 diff --git ql/src/test/results/clientpositive/spark/join32_lessSize.q.out ql/src/test/results/clientpositive/spark/join32_lessSize.q.out new file mode 100644 index 0000000..bffd620 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join32_lessSize.q.out @@ -0,0 +1,2678 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j1 +PREHOOK: query: CREATE TABLE dest_j2(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j2 +POSTHOOK: query: CREATE TABLE dest_j2(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j2 +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src1 + x + TOK_TABREF + TOK_TABNAME + src + y + = + . + TOK_TABLE_OR_COL + x + key + . + TOK_TABLE_OR_COL + y + key + TOK_TABREF + TOK_TABNAME + srcpart + z + and + and + = + . + TOK_TABLE_OR_COL + x + value + . + TOK_TABLE_OR_COL + z + value + = + . + TOK_TABLE_OR_COL + z + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + z + hr + 11 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest_j1 + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + z + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + y + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [z] + Map 3 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [y] + Map 5 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [x] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col5} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col6, _col11 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Reducer 4 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col0 (type: string), _col6 (type: string) + auto parallelism: true + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@dest_j1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from dest_j1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +150 val_150 val_150 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +66 val_66 val_66 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +PREHOOK: query: EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src w JOIN src1 x ON (x.value = w.value) +JOIN src y ON (x.key = y.key) +JOIN src1 z ON (x.key = z.key) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src w JOIN src1 x ON (x.value = w.value) +JOIN src y ON (x.key = y.key) +JOIN src1 z ON (x.key = z.key) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_JOIN + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src + w + TOK_TABREF + TOK_TABNAME + src1 + x + = + . + TOK_TABLE_OR_COL + x + value + . + TOK_TABLE_OR_COL + w + value + TOK_TABREF + TOK_TABNAME + src + y + = + . + TOK_TABLE_OR_COL + x + key + . + TOK_TABLE_OR_COL + y + key + TOK_TABREF + TOK_TABNAME + src1 + z + = + . + TOK_TABLE_OR_COL + x + key + . + TOK_TABLE_OR_COL + z + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest_j1 + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + z + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + y + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: w + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [w] + Map 4 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + tag: 2 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [z] + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [y] + Map 6 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value is not null and key is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [x] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {VALUE._col0} + outputColumnNames: _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Reducer 3 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + 2 {VALUE._col0} + outputColumnNames: _col5, _col11, _col16 + Statistics: Num rows: 605 Data size: 6426 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col5 (type: string), _col16 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 605 Data size: 6426 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 605 Data size: 6426 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src w JOIN src1 x ON (x.value = w.value) +JOIN src y ON (x.key = y.key) +JOIN src1 z ON (x.key = z.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest_j1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src w JOIN src1 x ON (x.value = w.value) +JOIN src y ON (x.key = y.key) +JOIN src1 z ON (x.key = z.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)z.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from dest_j1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +150 val_150 val_150 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +66 val_66 val_66 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +PREHOOK: query: EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, z.value, res.value +FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res +JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, z.value, res.value +FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res +JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src1 + x + TOK_TABREF + TOK_TABNAME + src + y + = + . + TOK_TABLE_OR_COL + x + key + . + TOK_TABLE_OR_COL + y + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x + value + res + TOK_TABREF + TOK_TABNAME + srcpart + z + and + and + = + . + TOK_TABLE_OR_COL + res + value + . + TOK_TABLE_OR_COL + z + value + = + . + TOK_TABLE_OR_COL + z + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + z + hr + 11 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest_j2 + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + res + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + z + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + res + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [y] + Map 4 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [x] + Map 5 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [z] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col0 (type: string) + auto parallelism: true + Reducer 3 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2 + serialization.ddl struct dest_j2 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2 + serialization.ddl struct dest_j2 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, z.value, res.value +FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res +JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@dest_j2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, z.value, res.value +FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res +JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@dest_j2 +POSTHOOK: Lineage: dest_j2.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.val2 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from dest_j2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j2 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j2 +#### A masked pattern was here #### +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +150 val_150 val_150 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +66 val_66 val_66 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +PREHOOK: query: EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, z.value, res.value +FROM (select x.key, x.value from src1 x LEFT OUTER JOIN src y ON (x.key = y.key)) res +JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, z.value, res.value +FROM (select x.key, x.value from src1 x LEFT OUTER JOIN src y ON (x.key = y.key)) res +JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_LEFTOUTERJOIN + TOK_TABREF + TOK_TABNAME + src1 + x + TOK_TABREF + TOK_TABNAME + src + y + = + . + TOK_TABLE_OR_COL + x + key + . + TOK_TABLE_OR_COL + y + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x + value + res + TOK_TABREF + TOK_TABNAME + srcpart + z + and + and + = + . + TOK_TABLE_OR_COL + res + value + . + TOK_TABLE_OR_COL + z + value + = + . + TOK_TABLE_OR_COL + z + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + z + hr + 11 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest_j2 + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + res + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + z + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + res + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [y] + Map 4 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [x] + Map 5 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [z] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col0 (type: string) + auto parallelism: true + Reducer 3 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2 + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct dest_j2 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2 + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct dest_j2 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, z.value, res.value +FROM (select x.key, x.value from src1 x LEFT OUTER JOIN src y ON (x.key = y.key)) res +JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@dest_j2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, z.value, res.value +FROM (select x.key, x.value from src1 x LEFT OUTER JOIN src y ON (x.key = y.key)) res +JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@dest_j2 +POSTHOOK: Lineage: dest_j2.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.val2 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from dest_j2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j2 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j2 +#### A masked pattern was here #### + val_165 val_165 + val_165 val_165 + val_193 val_193 + val_193 val_193 + val_193 val_193 + val_265 val_265 + val_265 val_265 + val_27 val_27 + val_409 val_409 + val_409 val_409 + val_409 val_409 + val_484 val_484 +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +150 val_150 val_150 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +66 val_66 val_66 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, x.value, res.value +FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res +JOIN srcpart x ON (res.value = x.value and x.ds='2008-04-08' and x.hr=11) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, x.value, res.value +FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res +JOIN srcpart x ON (res.value = x.value and x.ds='2008-04-08' and x.hr=11) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, x.value, res.value +FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res +JOIN srcpart x ON (res.value = x.value and x.ds='2008-04-08' and x.hr=11) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@dest_j2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, x.value, res.value +FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res +JOIN srcpart x ON (res.value = x.value and x.ds='2008-04-08' and x.hr=11) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@dest_j2 +POSTHOOK: Lineage: dest_j2.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.val2 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.value SIMPLE [(srcpart)x.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from dest_j2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j2 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j2 +#### A masked pattern was here #### +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +150 val_150 val_150 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +66 val_66 val_66 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, y.value, res.value +FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res +JOIN srcpart y ON (res.value = y.value and y.ds='2008-04-08' and y.hr=11) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, y.value, res.value +FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res +JOIN srcpart y ON (res.value = y.value and y.ds='2008-04-08' and y.hr=11) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, y.value, res.value +FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res +JOIN srcpart y ON (res.value = y.value and y.ds='2008-04-08' and y.hr=11) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@dest_j2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, y.value, res.value +FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res +JOIN srcpart y ON (res.value = y.value and y.ds='2008-04-08' and y.hr=11) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@dest_j2 +POSTHOOK: Lineage: dest_j2.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.val2 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.value SIMPLE [(srcpart)y.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from dest_j2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j2 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j2 +#### A masked pattern was here #### +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +150 val_150 val_150 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +66 val_66 val_66 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 diff --git ql/src/test/results/clientpositive/spark/join33.q.out ql/src/test/results/clientpositive/spark/join33.q.out new file mode 100644 index 0000000..5aea3f3 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join33.q.out @@ -0,0 +1,524 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j1 +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src1 + x + TOK_TABREF + TOK_TABNAME + src + y + = + . + TOK_TABLE_OR_COL + x + key + . + TOK_TABLE_OR_COL + y + key + TOK_TABREF + TOK_TABNAME + srcpart + z + and + and + = + . + TOK_TABLE_OR_COL + x + value + . + TOK_TABLE_OR_COL + z + value + = + . + TOK_TABLE_OR_COL + z + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + z + hr + 11 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest_j1 + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + z + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + y + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [z] + Map 3 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [y] + Map 5 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [x] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col5} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col6, _col11 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Reducer 4 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col0 (type: string), _col6 (type: string) + auto parallelism: true + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@dest_j1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from dest_j1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +150 val_150 val_150 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +66 val_66 val_66 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 diff --git ql/src/test/results/clientpositive/spark/join34.q.out ql/src/test/results/clientpositive/spark/join34.q.out new file mode 100644 index 0000000..533c285 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join34.q.out @@ -0,0 +1,498 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j1 +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, x.value, subq1.value +FROM +( SELECT x.key as key, x.value as value from src x where x.key < 20 + UNION ALL + SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 +) subq1 +JOIN src1 x ON (x.key = subq1.key) +PREHOOK: type: QUERY +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, x.value, subq1.value +FROM +( SELECT x.key as key, x.value as value from src x where x.key < 20 + UNION ALL + SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 +) subq1 +JOIN src1 x ON (x.key = subq1.key) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_SUBQUERY + TOK_UNION + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + x + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x + key + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x + value + value + TOK_WHERE + < + . + TOK_TABLE_OR_COL + x + key + 20 + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + x1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x1 + key + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x1 + value + value + TOK_WHERE + > + . + TOK_TABLE_OR_COL + x1 + key + 100 + subq1 + TOK_TABREF + TOK_TABNAME + src1 + x + = + . + TOK_TABLE_OR_COL + x + key + . + TOK_TABLE_OR_COL + subq1 + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest_j1 + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + subq1 + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Union 2 (GROUP PARTITION-LEVEL SORT, 1) + Union 2 <- Map 1 (NONE, 0), Map 4 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key < 20) and key is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [x] + Map 4 + Map Operator Tree: + TableScan + alias: x1 + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 100) and key is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [x1] + Map 5 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [x] + Reducer 3 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Union 2 + Vertex: Union 2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, x.value, subq1.value +FROM +( SELECT x.key as key, x.value as value from src x where x.key < 20 + UNION ALL + SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 +) subq1 +JOIN src1 x ON (x.key = subq1.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest_j1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, x.value, subq1.value +FROM +( SELECT x.key as key, x.value as value from src x where x.key < 20 + UNION ALL + SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 +) subq1 +JOIN src1 x ON (x.key = subq1.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)x.FieldSchema(name:value, type:string, comment:default), (src)x1.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from dest_j1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +128 val_128 +128 val_128 +128 val_128 +146 val_146 val_146 +146 val_146 val_146 +150 val_150 val_150 +213 val_213 val_213 +213 val_213 val_213 +224 val_224 +224 val_224 +238 val_238 val_238 +238 val_238 val_238 +255 val_255 val_255 +255 val_255 val_255 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +278 val_278 val_278 +278 val_278 val_278 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +369 val_369 +369 val_369 +369 val_369 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 diff --git ql/src/test/results/clientpositive/spark/join35.q.out ql/src/test/results/clientpositive/spark/join35.q.out new file mode 100644 index 0000000..1750aec --- /dev/null +++ ql/src/test/results/clientpositive/spark/join35.q.out @@ -0,0 +1,543 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j1 +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, x.value, subq1.cnt +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key +) subq1 +JOIN src1 x ON (x.key = subq1.key) +PREHOOK: type: QUERY +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, x.value, subq1.cnt +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key +) subq1 +JOIN src1 x ON (x.key = subq1.key) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_SUBQUERY + TOK_UNION + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + x + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x + key + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + cnt + TOK_WHERE + < + . + TOK_TABLE_OR_COL + x + key + 20 + TOK_GROUPBY + . + TOK_TABLE_OR_COL + x + key + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + x1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x1 + key + key + TOK_SELEXPR + TOK_FUNCTION + count + 1 + cnt + TOK_WHERE + > + . + TOK_TABLE_OR_COL + x1 + key + 100 + TOK_GROUPBY + . + TOK_TABLE_OR_COL + x1 + key + subq1 + TOK_TABREF + TOK_TABNAME + src1 + x + = + . + TOK_TABLE_OR_COL + x + key + . + TOK_TABLE_OR_COL + subq1 + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest_j1 + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + subq1 + cnt + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 1), Union 3 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (GROUP, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key < 20) and key is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [x] + Map 5 + Map Operator Tree: + TableScan + alias: x1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 100) and key is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [x1] + Map 7 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [x] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + value expressions: _col1 (type: bigint) + auto parallelism: true + Reducer 4 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 90 Data size: 957 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 90 Data size: 957 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 90 Data size: 957 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:int +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, i32 val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Reducer 6 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + value expressions: _col1 (type: bigint) + auto parallelism: true + Union 3 + Vertex: Union 3 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:int +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, i32 val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, x.value, subq1.cnt +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key +) subq1 +JOIN src1 x ON (x.key = subq1.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest_j1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, x.value, subq1.cnt +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key + UNION ALL + SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key +) subq1 +JOIN src1 x ON (x.key = subq1.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)x.null, (src)x1.null, ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from dest_j1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +128 3 +146 val_146 2 +150 val_150 1 +213 val_213 2 +224 2 +238 val_238 2 +255 val_255 2 +273 val_273 3 +278 val_278 2 +311 val_311 3 +369 3 +401 val_401 5 +406 val_406 4 diff --git ql/src/test/results/clientpositive/spark/join36.q.out ql/src/test/results/clientpositive/spark/join36.q.out new file mode 100644 index 0000000..3c7e5e4 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join36.q.out @@ -0,0 +1,479 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE tmp1(key INT, cnt INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE tmp1(key INT, cnt INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp1 +PREHOOK: query: CREATE TABLE tmp2(key INT, cnt INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp2 +POSTHOOK: query: CREATE TABLE tmp2(key INT, cnt INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp2 +PREHOOK: query: CREATE TABLE dest_j1(key INT, value INT, val2 INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: CREATE TABLE dest_j1(key INT, value INT, val2 INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j1 +PREHOOK: query: INSERT OVERWRITE TABLE tmp1 +SELECT key, count(1) from src group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tmp1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE tmp1 +SELECT key, count(1) from src group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tmp1 +POSTHOOK: Lineage: tmp1.cnt EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: tmp1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE tmp2 +SELECT key, count(1) from src group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tmp2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE tmp2 +SELECT key, count(1) from src group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tmp2 +POSTHOOK: Lineage: tmp2.cnt EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: tmp2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt +FROM tmp1 x JOIN tmp2 y ON (x.key = y.key) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt +FROM tmp1 x JOIN tmp2 y ON (x.key = y.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 223 Data size: 1791 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 112 Data size: 899 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 112 Data size: 899 Basic stats: COMPLETE Column stats: NONE + value expressions: cnt (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 223 Data size: 1791 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 112 Data size: 899 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 112 Data size: 899 Basic stats: COMPLETE Column stats: NONE + value expressions: cnt (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 123 Data size: 988 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col6 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 123 Data size: 988 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 123 Data size: 988 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt +FROM tmp1 x JOIN tmp2 y ON (x.key = y.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp1 +PREHOOK: Input: default@tmp2 +PREHOOK: Output: default@dest_j1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt +FROM tmp1 x JOIN tmp2 y ON (x.key = y.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp1 +POSTHOOK: Input: default@tmp2 +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.key SIMPLE [(tmp1)x.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(tmp2)y.FieldSchema(name:cnt, type:int, comment:null), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(tmp1)x.FieldSchema(name:cnt, type:int, comment:null), ] +PREHOOK: query: select * from dest_j1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +0 3 3 +10 1 1 +100 2 2 +103 2 2 +104 2 2 +105 1 1 +11 1 1 +111 1 1 +113 2 2 +114 1 1 +116 1 1 +118 2 2 +119 3 3 +12 2 2 +120 2 2 +125 2 2 +126 1 1 +128 3 3 +129 2 2 +131 1 1 +133 1 1 +134 2 2 +136 1 1 +137 2 2 +138 4 4 +143 1 1 +145 1 1 +146 2 2 +149 2 2 +15 2 2 +150 1 1 +152 2 2 +153 1 1 +155 1 1 +156 1 1 +157 1 1 +158 1 1 +160 1 1 +162 1 1 +163 1 1 +164 2 2 +165 2 2 +166 1 1 +167 3 3 +168 1 1 +169 4 4 +17 1 1 +170 1 1 +172 2 2 +174 2 2 +175 2 2 +176 2 2 +177 1 1 +178 1 1 +179 2 2 +18 2 2 +180 1 1 +181 1 1 +183 1 1 +186 1 1 +187 3 3 +189 1 1 +19 1 1 +190 1 1 +191 2 2 +192 1 1 +193 3 3 +194 1 1 +195 2 2 +196 1 1 +197 2 2 +199 3 3 +2 1 1 +20 1 1 +200 2 2 +201 1 1 +202 1 1 +203 2 2 +205 2 2 +207 2 2 +208 3 3 +209 2 2 +213 2 2 +214 1 1 +216 2 2 +217 2 2 +218 1 1 +219 2 2 +221 2 2 +222 1 1 +223 2 2 +224 2 2 +226 1 1 +228 1 1 +229 2 2 +230 5 5 +233 2 2 +235 1 1 +237 2 2 +238 2 2 +239 2 2 +24 2 2 +241 1 1 +242 2 2 +244 1 1 +247 1 1 +248 1 1 +249 1 1 +252 1 1 +255 2 2 +256 2 2 +257 1 1 +258 1 1 +26 2 2 +260 1 1 +262 1 1 +263 1 1 +265 2 2 +266 1 1 +27 1 1 +272 2 2 +273 3 3 +274 1 1 +275 1 1 +277 4 4 +278 2 2 +28 1 1 +280 2 2 +281 2 2 +282 2 2 +283 1 1 +284 1 1 +285 1 1 +286 1 1 +287 1 1 +288 2 2 +289 1 1 +291 1 1 +292 1 1 +296 1 1 +298 3 3 +30 1 1 +302 1 1 +305 1 1 +306 1 1 +307 2 2 +308 1 1 +309 2 2 +310 1 1 +311 3 3 +315 1 1 +316 3 3 +317 2 2 +318 3 3 +321 2 2 +322 2 2 +323 1 1 +325 2 2 +327 3 3 +33 1 1 +331 2 2 +332 1 1 +333 2 2 +335 1 1 +336 1 1 +338 1 1 +339 1 1 +34 1 1 +341 1 1 +342 2 2 +344 2 2 +345 1 1 +348 5 5 +35 3 3 +351 1 1 +353 2 2 +356 1 1 +360 1 1 +362 1 1 +364 1 1 +365 1 1 +366 1 1 +367 2 2 +368 1 1 +369 3 3 +37 2 2 +373 1 1 +374 1 1 +375 1 1 +377 1 1 +378 1 1 +379 1 1 +382 2 2 +384 3 3 +386 1 1 +389 1 1 +392 1 1 +393 1 1 +394 1 1 +395 2 2 +396 3 3 +397 2 2 +399 2 2 +4 1 1 +400 1 1 +401 5 5 +402 1 1 +403 3 3 +404 2 2 +406 4 4 +407 1 1 +409 3 3 +41 1 1 +411 1 1 +413 2 2 +414 2 2 +417 3 3 +418 1 1 +419 1 1 +42 2 2 +421 1 1 +424 2 2 +427 1 1 +429 2 2 +43 1 1 +430 3 3 +431 3 3 +432 1 1 +435 1 1 +436 1 1 +437 1 1 +438 3 3 +439 2 2 +44 1 1 +443 1 1 +444 1 1 +446 1 1 +448 1 1 +449 1 1 +452 1 1 +453 1 1 +454 3 3 +455 1 1 +457 1 1 +458 2 2 +459 2 2 +460 1 1 +462 2 2 +463 2 2 +466 3 3 +467 1 1 +468 4 4 +469 5 5 +47 1 1 +470 1 1 +472 1 1 +475 1 1 +477 1 1 +478 2 2 +479 1 1 +480 3 3 +481 1 1 +482 1 1 +483 1 1 +484 1 1 +485 1 1 +487 1 1 +489 4 4 +490 1 1 +491 1 1 +492 2 2 +493 1 1 +494 1 1 +495 1 1 +496 1 1 +497 1 1 +498 3 3 +5 3 3 +51 2 2 +53 1 1 +54 1 1 +57 1 1 +58 2 2 +64 1 1 +65 1 1 +66 1 1 +67 2 2 +69 1 1 +70 3 3 +72 2 2 +74 1 1 +76 2 2 +77 1 1 +78 1 1 +8 1 1 +80 1 1 +82 1 1 +83 2 2 +84 2 2 +85 1 1 +86 1 1 +87 1 1 +9 1 1 +90 3 3 +92 1 1 +95 2 2 +96 1 1 +97 2 2 +98 2 2 diff --git ql/src/test/results/clientpositive/spark/join37.q.out ql/src/test/results/clientpositive/spark/join37.q.out new file mode 100644 index 0000000..1e5697b --- /dev/null +++ ql/src/test/results/clientpositive/spark/join37.q.out @@ -0,0 +1,165 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key INT, value STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key INT, value STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j1 +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest_j1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from dest_j1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +128 val_128 +128 val_128 +128 val_128 +146 val_146 val_146 +146 val_146 val_146 +150 val_150 val_150 +213 val_213 val_213 +213 val_213 val_213 +224 val_224 +224 val_224 +238 val_238 val_238 +238 val_238 val_238 +255 val_255 val_255 +255 val_255 val_255 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +278 val_278 val_278 +278 val_278 val_278 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +369 val_369 +369 val_369 +369 val_369 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +66 val_66 val_66 +98 val_98 val_98 +98 val_98 val_98 diff --git ql/src/test/results/clientpositive/spark/join38.q.out ql/src/test/results/clientpositive/spark/join38.q.out new file mode 100644 index 0000000..cef8a84 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join38.q.out @@ -0,0 +1,158 @@ +PREHOOK: query: create table tmp(col0 string, col1 string,col2 string,col3 string,col4 string,col5 string,col6 string,col7 string,col8 string,col9 string,col10 string,col11 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp +POSTHOOK: query: create table tmp(col0 string, col1 string,col2 string,col3 string,col4 string,col5 string,col6 string,col7 string,col8 string,col9 string,col10 string,col11 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp +PREHOOK: query: insert overwrite table tmp select key, cast(key + 1 as int), key +2, key+3, key+4, cast(key+5 as int), key+6, key+7, key+8, key+9, key+10, cast(key+11 as int) from src where key = 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tmp +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tmp select key, cast(key + 1 as int), key +2, key+3, key+4, cast(key+5 as int), key+6, key+7, key+8, key+9, key+10, cast(key+11 as int) from src where key = 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tmp +POSTHOOK: Lineage: tmp.col0 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp.col1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp.col10 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp.col11 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp.col2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp.col3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp.col4 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp.col5 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp.col6 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp.col7 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp.col8 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tmp.col9 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: select * from tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp +#### A masked pattern was here #### +POSTHOOK: query: select * from tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp +#### A masked pattern was here #### +100 101 102.0 103.0 104.0 105 106.0 107.0 108.0 109.0 110.0 111 +100 101 102.0 103.0 104.0 105 106.0 107.0 108.0 109.0 110.0 111 +PREHOOK: query: explain +FROM src a JOIN tmp b ON (a.key = b.col11) +SELECT /*+ MAPJOIN(a) */ a.value, b.col5, count(1) as count +where b.col11 = 111 +group by a.value, b.col5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM src a JOIN tmp b ON (a.key = b.col11) +SELECT /*+ MAPJOIN(a) */ a.value, b.col5, count(1) as count +where b.col11 = 111 +group by a.value, b.col5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (col11 is not null and (col11 = 111)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: col5 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key = 111)) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col5} + outputColumnNames: _col1, _col10 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col10 (type: string) + outputColumnNames: _col1, _col10 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col1 (type: string), _col10 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM src a JOIN tmp b ON (a.key = b.col11) +SELECT /*+ MAPJOIN(a) */ a.value, b.col5, count(1) as count +where b.col11 = 111 +group by a.value, b.col5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@tmp +#### A masked pattern was here #### +POSTHOOK: query: FROM src a JOIN tmp b ON (a.key = b.col11) +SELECT /*+ MAPJOIN(a) */ a.value, b.col5, count(1) as count +where b.col11 = 111 +group by a.value, b.col5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@tmp +#### A masked pattern was here #### +val_111 105 2 diff --git ql/src/test/results/clientpositive/spark/join39.q.out ql/src/test/results/clientpositive/spark/join39.q.out new file mode 100644 index 0000000..892a722 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join39.q.out @@ -0,0 +1,694 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, value STRING, key1 string, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, value STRING, key1 string, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j1 +PREHOOK: query: explain +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value +FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value +FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key <= 100) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value +FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_j1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value +FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.key SIMPLE [(src)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.key1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from dest_j1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +10 val_10 10 val_10 +100 val_100 100 val_100 +100 val_100 100 val_100 +100 val_100 100 val_100 +100 val_100 100 val_100 +103 val_103 NULL NULL +103 val_103 NULL NULL +104 val_104 NULL NULL +104 val_104 NULL NULL +105 val_105 NULL NULL +11 val_11 11 val_11 +111 val_111 NULL NULL +113 val_113 NULL NULL +113 val_113 NULL NULL +114 val_114 NULL NULL +116 val_116 NULL NULL +118 val_118 NULL NULL +118 val_118 NULL NULL +119 val_119 NULL NULL +119 val_119 NULL NULL +119 val_119 NULL NULL +12 val_12 12 val_12 +12 val_12 12 val_12 +12 val_12 12 val_12 +12 val_12 12 val_12 +120 val_120 NULL NULL +120 val_120 NULL NULL +125 val_125 NULL NULL +125 val_125 NULL NULL +126 val_126 NULL NULL +128 val_128 NULL NULL +128 val_128 NULL NULL +128 val_128 NULL NULL +129 val_129 NULL NULL +129 val_129 NULL NULL +131 val_131 NULL NULL +133 val_133 NULL NULL +134 val_134 NULL NULL +134 val_134 NULL NULL +136 val_136 NULL NULL +137 val_137 NULL NULL +137 val_137 NULL NULL +138 val_138 NULL NULL +138 val_138 NULL NULL +138 val_138 NULL NULL +138 val_138 NULL NULL +143 val_143 NULL NULL +145 val_145 NULL NULL +146 val_146 NULL NULL +146 val_146 NULL NULL +149 val_149 NULL NULL +149 val_149 NULL NULL +15 val_15 15 val_15 +15 val_15 15 val_15 +15 val_15 15 val_15 +15 val_15 15 val_15 +150 val_150 NULL NULL +152 val_152 NULL NULL +152 val_152 NULL NULL +153 val_153 NULL NULL +155 val_155 NULL NULL +156 val_156 NULL NULL +157 val_157 NULL NULL +158 val_158 NULL NULL +160 val_160 NULL NULL +162 val_162 NULL NULL +163 val_163 NULL NULL +164 val_164 NULL NULL +164 val_164 NULL NULL +165 val_165 NULL NULL +165 val_165 NULL NULL +166 val_166 NULL NULL +167 val_167 NULL NULL +167 val_167 NULL NULL +167 val_167 NULL NULL +168 val_168 NULL NULL +169 val_169 NULL NULL +169 val_169 NULL NULL +169 val_169 NULL NULL +169 val_169 NULL NULL +17 val_17 17 val_17 +170 val_170 NULL NULL +172 val_172 NULL NULL +172 val_172 NULL NULL +174 val_174 NULL NULL +174 val_174 NULL NULL +175 val_175 NULL NULL +175 val_175 NULL NULL +176 val_176 NULL NULL +176 val_176 NULL NULL +177 val_177 NULL NULL +178 val_178 NULL NULL +179 val_179 NULL NULL +179 val_179 NULL NULL +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +180 val_180 NULL NULL +181 val_181 NULL NULL +183 val_183 NULL NULL +186 val_186 NULL NULL +187 val_187 NULL NULL +187 val_187 NULL NULL +187 val_187 NULL NULL +189 val_189 NULL NULL +19 val_19 19 val_19 +190 val_190 NULL NULL +191 val_191 NULL NULL +191 val_191 NULL NULL +192 val_192 NULL NULL +193 val_193 NULL NULL +193 val_193 NULL NULL +193 val_193 NULL NULL +194 val_194 NULL NULL +195 val_195 NULL NULL +195 val_195 NULL NULL +196 val_196 NULL NULL +197 val_197 NULL NULL +197 val_197 NULL NULL +199 val_199 NULL NULL +199 val_199 NULL NULL +199 val_199 NULL NULL +2 val_2 2 val_2 +20 val_20 20 val_20 +200 val_200 NULL NULL +200 val_200 NULL NULL +201 val_201 NULL NULL +202 val_202 NULL NULL +203 val_203 NULL NULL +203 val_203 NULL NULL +205 val_205 NULL NULL +205 val_205 NULL NULL +207 val_207 NULL NULL +207 val_207 NULL NULL +208 val_208 NULL NULL +208 val_208 NULL NULL +208 val_208 NULL NULL +209 val_209 NULL NULL +209 val_209 NULL NULL +213 val_213 NULL NULL +213 val_213 NULL NULL +214 val_214 NULL NULL +216 val_216 NULL NULL +216 val_216 NULL NULL +217 val_217 NULL NULL +217 val_217 NULL NULL +218 val_218 NULL NULL +219 val_219 NULL NULL +219 val_219 NULL NULL +221 val_221 NULL NULL +221 val_221 NULL NULL +222 val_222 NULL NULL +223 val_223 NULL NULL +223 val_223 NULL NULL +224 val_224 NULL NULL +224 val_224 NULL NULL +226 val_226 NULL NULL +228 val_228 NULL NULL +229 val_229 NULL NULL +229 val_229 NULL NULL +230 val_230 NULL NULL +230 val_230 NULL NULL +230 val_230 NULL NULL +230 val_230 NULL NULL +230 val_230 NULL NULL +233 val_233 NULL NULL +233 val_233 NULL NULL +235 val_235 NULL NULL +237 val_237 NULL NULL +237 val_237 NULL NULL +238 val_238 NULL NULL +238 val_238 NULL NULL +239 val_239 NULL NULL +239 val_239 NULL NULL +24 val_24 24 val_24 +24 val_24 24 val_24 +24 val_24 24 val_24 +24 val_24 24 val_24 +241 val_241 NULL NULL +242 val_242 NULL NULL +242 val_242 NULL NULL +244 val_244 NULL NULL +247 val_247 NULL NULL +248 val_248 NULL NULL +249 val_249 NULL NULL +252 val_252 NULL NULL +255 val_255 NULL NULL +255 val_255 NULL NULL +256 val_256 NULL NULL +256 val_256 NULL NULL +257 val_257 NULL NULL +258 val_258 NULL NULL +26 val_26 26 val_26 +26 val_26 26 val_26 +26 val_26 26 val_26 +26 val_26 26 val_26 +260 val_260 NULL NULL +262 val_262 NULL NULL +263 val_263 NULL NULL +265 val_265 NULL NULL +265 val_265 NULL NULL +266 val_266 NULL NULL +27 val_27 27 val_27 +272 val_272 NULL NULL +272 val_272 NULL NULL +273 val_273 NULL NULL +273 val_273 NULL NULL +273 val_273 NULL NULL +274 val_274 NULL NULL +275 val_275 NULL NULL +277 val_277 NULL NULL +277 val_277 NULL NULL +277 val_277 NULL NULL +277 val_277 NULL NULL +278 val_278 NULL NULL +278 val_278 NULL NULL +28 val_28 28 val_28 +280 val_280 NULL NULL +280 val_280 NULL NULL +281 val_281 NULL NULL +281 val_281 NULL NULL +282 val_282 NULL NULL +282 val_282 NULL NULL +283 val_283 NULL NULL +284 val_284 NULL NULL +285 val_285 NULL NULL +286 val_286 NULL NULL +287 val_287 NULL NULL +288 val_288 NULL NULL +288 val_288 NULL NULL +289 val_289 NULL NULL +291 val_291 NULL NULL +292 val_292 NULL NULL +296 val_296 NULL NULL +298 val_298 NULL NULL +298 val_298 NULL NULL +298 val_298 NULL NULL +30 val_30 30 val_30 +302 val_302 NULL NULL +305 val_305 NULL NULL +306 val_306 NULL NULL +307 val_307 NULL NULL +307 val_307 NULL NULL +308 val_308 NULL NULL +309 val_309 NULL NULL +309 val_309 NULL NULL +310 val_310 NULL NULL +311 val_311 NULL NULL +311 val_311 NULL NULL +311 val_311 NULL NULL +315 val_315 NULL NULL +316 val_316 NULL NULL +316 val_316 NULL NULL +316 val_316 NULL NULL +317 val_317 NULL NULL +317 val_317 NULL NULL +318 val_318 NULL NULL +318 val_318 NULL NULL +318 val_318 NULL NULL +321 val_321 NULL NULL +321 val_321 NULL NULL +322 val_322 NULL NULL +322 val_322 NULL NULL +323 val_323 NULL NULL +325 val_325 NULL NULL +325 val_325 NULL NULL +327 val_327 NULL NULL +327 val_327 NULL NULL +327 val_327 NULL NULL +33 val_33 33 val_33 +331 val_331 NULL NULL +331 val_331 NULL NULL +332 val_332 NULL NULL +333 val_333 NULL NULL +333 val_333 NULL NULL +335 val_335 NULL NULL +336 val_336 NULL NULL +338 val_338 NULL NULL +339 val_339 NULL NULL +34 val_34 34 val_34 +341 val_341 NULL NULL +342 val_342 NULL NULL +342 val_342 NULL NULL +344 val_344 NULL NULL +344 val_344 NULL NULL +345 val_345 NULL NULL +348 val_348 NULL NULL +348 val_348 NULL NULL +348 val_348 NULL NULL +348 val_348 NULL NULL +348 val_348 NULL NULL +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +351 val_351 NULL NULL +353 val_353 NULL NULL +353 val_353 NULL NULL +356 val_356 NULL NULL +360 val_360 NULL NULL +362 val_362 NULL NULL +364 val_364 NULL NULL +365 val_365 NULL NULL +366 val_366 NULL NULL +367 val_367 NULL NULL +367 val_367 NULL NULL +368 val_368 NULL NULL +369 val_369 NULL NULL +369 val_369 NULL NULL +369 val_369 NULL NULL +37 val_37 37 val_37 +37 val_37 37 val_37 +37 val_37 37 val_37 +37 val_37 37 val_37 +373 val_373 NULL NULL +374 val_374 NULL NULL +375 val_375 NULL NULL +377 val_377 NULL NULL +378 val_378 NULL NULL +379 val_379 NULL NULL +382 val_382 NULL NULL +382 val_382 NULL NULL +384 val_384 NULL NULL +384 val_384 NULL NULL +384 val_384 NULL NULL +386 val_386 NULL NULL +389 val_389 NULL NULL +392 val_392 NULL NULL +393 val_393 NULL NULL +394 val_394 NULL NULL +395 val_395 NULL NULL +395 val_395 NULL NULL +396 val_396 NULL NULL +396 val_396 NULL NULL +396 val_396 NULL NULL +397 val_397 NULL NULL +397 val_397 NULL NULL +399 val_399 NULL NULL +399 val_399 NULL NULL +4 val_4 4 val_4 +400 val_400 NULL NULL +401 val_401 NULL NULL +401 val_401 NULL NULL +401 val_401 NULL NULL +401 val_401 NULL NULL +401 val_401 NULL NULL +402 val_402 NULL NULL +403 val_403 NULL NULL +403 val_403 NULL NULL +403 val_403 NULL NULL +404 val_404 NULL NULL +404 val_404 NULL NULL +406 val_406 NULL NULL +406 val_406 NULL NULL +406 val_406 NULL NULL +406 val_406 NULL NULL +407 val_407 NULL NULL +409 val_409 NULL NULL +409 val_409 NULL NULL +409 val_409 NULL NULL +41 val_41 41 val_41 +411 val_411 NULL NULL +413 val_413 NULL NULL +413 val_413 NULL NULL +414 val_414 NULL NULL +414 val_414 NULL NULL +417 val_417 NULL NULL +417 val_417 NULL NULL +417 val_417 NULL NULL +418 val_418 NULL NULL +419 val_419 NULL NULL +42 val_42 42 val_42 +42 val_42 42 val_42 +42 val_42 42 val_42 +42 val_42 42 val_42 +421 val_421 NULL NULL +424 val_424 NULL NULL +424 val_424 NULL NULL +427 val_427 NULL NULL +429 val_429 NULL NULL +429 val_429 NULL NULL +43 val_43 43 val_43 +430 val_430 NULL NULL +430 val_430 NULL NULL +430 val_430 NULL NULL +431 val_431 NULL NULL +431 val_431 NULL NULL +431 val_431 NULL NULL +432 val_432 NULL NULL +435 val_435 NULL NULL +436 val_436 NULL NULL +437 val_437 NULL NULL +438 val_438 NULL NULL +438 val_438 NULL NULL +438 val_438 NULL NULL +439 val_439 NULL NULL +439 val_439 NULL NULL +44 val_44 44 val_44 +443 val_443 NULL NULL +444 val_444 NULL NULL +446 val_446 NULL NULL +448 val_448 NULL NULL +449 val_449 NULL NULL +452 val_452 NULL NULL +453 val_453 NULL NULL +454 val_454 NULL NULL +454 val_454 NULL NULL +454 val_454 NULL NULL +455 val_455 NULL NULL +457 val_457 NULL NULL +458 val_458 NULL NULL +458 val_458 NULL NULL +459 val_459 NULL NULL +459 val_459 NULL NULL +460 val_460 NULL NULL +462 val_462 NULL NULL +462 val_462 NULL NULL +463 val_463 NULL NULL +463 val_463 NULL NULL +466 val_466 NULL NULL +466 val_466 NULL NULL +466 val_466 NULL NULL +467 val_467 NULL NULL +468 val_468 NULL NULL +468 val_468 NULL NULL +468 val_468 NULL NULL +468 val_468 NULL NULL +469 val_469 NULL NULL +469 val_469 NULL NULL +469 val_469 NULL NULL +469 val_469 NULL NULL +469 val_469 NULL NULL +47 val_47 47 val_47 +470 val_470 NULL NULL +472 val_472 NULL NULL +475 val_475 NULL NULL +477 val_477 NULL NULL +478 val_478 NULL NULL +478 val_478 NULL NULL +479 val_479 NULL NULL +480 val_480 NULL NULL +480 val_480 NULL NULL +480 val_480 NULL NULL +481 val_481 NULL NULL +482 val_482 NULL NULL +483 val_483 NULL NULL +484 val_484 NULL NULL +485 val_485 NULL NULL +487 val_487 NULL NULL +489 val_489 NULL NULL +489 val_489 NULL NULL +489 val_489 NULL NULL +489 val_489 NULL NULL +490 val_490 NULL NULL +491 val_491 NULL NULL +492 val_492 NULL NULL +492 val_492 NULL NULL +493 val_493 NULL NULL +494 val_494 NULL NULL +495 val_495 NULL NULL +496 val_496 NULL NULL +497 val_497 NULL NULL +498 val_498 NULL NULL +498 val_498 NULL NULL +498 val_498 NULL NULL +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +51 val_51 51 val_51 +51 val_51 51 val_51 +51 val_51 51 val_51 +51 val_51 51 val_51 +53 val_53 53 val_53 +54 val_54 54 val_54 +57 val_57 57 val_57 +58 val_58 58 val_58 +58 val_58 58 val_58 +58 val_58 58 val_58 +58 val_58 58 val_58 +64 val_64 64 val_64 +65 val_65 65 val_65 +66 val_66 66 val_66 +67 val_67 67 val_67 +67 val_67 67 val_67 +67 val_67 67 val_67 +67 val_67 67 val_67 +69 val_69 69 val_69 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +72 val_72 72 val_72 +72 val_72 72 val_72 +72 val_72 72 val_72 +72 val_72 72 val_72 +74 val_74 74 val_74 +76 val_76 76 val_76 +76 val_76 76 val_76 +76 val_76 76 val_76 +76 val_76 76 val_76 +77 val_77 77 val_77 +78 val_78 78 val_78 +8 val_8 8 val_8 +80 val_80 80 val_80 +82 val_82 82 val_82 +83 val_83 83 val_83 +83 val_83 83 val_83 +83 val_83 83 val_83 +83 val_83 83 val_83 +84 val_84 84 val_84 +84 val_84 84 val_84 +84 val_84 84 val_84 +84 val_84 84 val_84 +85 val_85 85 val_85 +86 val_86 86 val_86 +87 val_87 87 val_87 +9 val_9 9 val_9 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +92 val_92 92 val_92 +95 val_95 95 val_95 +95 val_95 95 val_95 +95 val_95 95 val_95 +95 val_95 95 val_95 +96 val_96 96 val_96 +97 val_97 97 val_97 +97 val_97 97 val_97 +97 val_97 97 val_97 +97 val_97 97 val_97 +98 val_98 98 val_98 +98 val_98 98 val_98 +98 val_98 98 val_98 +98 val_98 98 val_98 diff --git ql/src/test/results/clientpositive/spark/join41.q.out ql/src/test/results/clientpositive/spark/join41.q.out new file mode 100644 index 0000000..d7c762d --- /dev/null +++ ql/src/test/results/clientpositive/spark/join41.q.out @@ -0,0 +1,175 @@ +PREHOOK: query: create table s1 as select * from src where key = 0 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@s1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: create table s1 as select * from src where key = 0 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@s1 +PREHOOK: query: EXPLAIN +SELECT * FROM s1 src1 LEFT OUTER JOIN s1 src2 ON (src1.key = src2.key AND src2.key > 10) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM s1 src1 LEFT OUTER JOIN s1 src2 ON (src1.key = src2.key AND src2.key > 10) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key > 10) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 26 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 26 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 26 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM s1 src1 LEFT OUTER JOIN s1 src2 ON (src1.key = src2.key AND src2.key > 10) +PREHOOK: type: QUERY +PREHOOK: Input: default@s1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM s1 src1 LEFT OUTER JOIN s1 src2 ON (src1.key = src2.key AND src2.key > 10) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s1 +#### A masked pattern was here #### +0 val_0 NULL NULL +0 val_0 NULL NULL +0 val_0 NULL NULL +PREHOOK: query: -- Make sure the big table is chosen correctly as part of HIVE-4146 +EXPLAIN +SELECT * FROM s1 src1 LEFT OUTER JOIN s1 src2 ON (src1.key = src2.key AND src2.key > 10) +PREHOOK: type: QUERY +POSTHOOK: query: -- Make sure the big table is chosen correctly as part of HIVE-4146 +EXPLAIN +SELECT * FROM s1 src1 LEFT OUTER JOIN s1 src2 ON (src1.key = src2.key AND src2.key > 10) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key > 10) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 26 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 26 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 26 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM s1 src1 LEFT OUTER JOIN s1 src2 ON (src1.key = src2.key AND src2.key > 10) +PREHOOK: type: QUERY +PREHOOK: Input: default@s1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM s1 src1 LEFT OUTER JOIN s1 src2 ON (src1.key = src2.key AND src2.key > 10) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@s1 +#### A masked pattern was here #### +0 val_0 NULL NULL +0 val_0 NULL NULL +0 val_0 NULL NULL diff --git ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out new file mode 100644 index 0000000..97bd917 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out @@ -0,0 +1,673 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@part +PREHOOK: query: explain select p1.p_name, p2.p_name +from part p1 , part p2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select p1.p_name, p2.p_name +from part p1 , part p2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 31 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 31 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 31 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 31 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col1} + 1 {VALUE._col1} + outputColumnNames: _col1, _col13 + Statistics: Num rows: 34 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col13 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 34 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 34 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select p1.p_name, p2.p_name, p3.p_name +from part p1 ,part p2 ,part p3 +where p1.p_name = p2.p_name and p2.p_name = p3.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select p1.p_name, p2.p_name, p3.p_name +from part p1 ,part p2 ,part p3 +where p1.p_name = p2.p_name and p2.p_name = p3.p_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 31 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 16 Data size: 1637 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 16 Data size: 1637 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 31 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 16 Data size: 1637 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 16 Data size: 1637 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 31 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 16 Data size: 1637 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 16 Data size: 1637 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + 2 {KEY.reducesinkkey0} + outputColumnNames: _col1, _col13, _col25 + Statistics: Num rows: 35 Data size: 3601 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 = _col13) and (_col13 = _col25)) (type: boolean) + Statistics: Num rows: 8 Data size: 823 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 823 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 823 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select p1.p_name, p2.p_name, p3.p_name +from part p1 , (select p_name from part) p2 ,part p3 +where p1.p_name = p2.p_name and p2.p_name = p3.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select p1.p_name, p2.p_name, p3.p_name +from part p1 , (select p_name from part) p2 ,part p3 +where p1.p_name = p2.p_name and p2.p_name = p3.p_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 31 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 16 Data size: 1637 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 1637 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 16 Data size: 1637 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 31 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 16 Data size: 1637 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 16 Data size: 1637 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 31 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 16 Data size: 1637 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 16 Data size: 1637 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + 2 {KEY.reducesinkkey0} + outputColumnNames: _col1, _col12, _col14 + Statistics: Num rows: 35 Data size: 3601 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 = _col12) and (_col12 = _col14)) (type: boolean) + Statistics: Num rows: 8 Data size: 823 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col12 (type: string), _col14 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 823 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 823 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select p1.p_name, p2.p_name, p3.p_name +from part p1 , part p2 , part p3 +where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select p1.p_name, p2.p_name, p3.p_name +from part p1 , part p2 , part p3 +where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 31 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 16 Data size: 1637 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 16 Data size: 1637 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col12} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col12, _col13, _col25 + Statistics: Num rows: 17 Data size: 1800 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and (_col25 = _col13)) (type: boolean) + Statistics: Num rows: 4 Data size: 423 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 423 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 423 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col12, _col13 + Statistics: Num rows: 33 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 8 Data size: 846 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 8 Data size: 846 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select p1.p_name, p2.p_name, p3.p_name, p4.p_name +from part p1 , part p2 join part p3 on p2.p_name = p1.p_name join part p4 +where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey + and p1.p_partkey = p2.p_partkey +PREHOOK: type: QUERY +POSTHOOK: query: explain select p1.p_name, p2.p_name, p3.p_name, p4.p_name +from part p1 , part p2 join part p3 on p2.p_name = p1.p_name join part p4 +where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey + and p1.p_partkey = p2.p_partkey +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p4 + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 31 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 16 Data size: 1637 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 16 Data size: 1637 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 8 Data size: 846 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 8 Data size: 846 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 8 Data size: 846 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 8 Data size: 846 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col11} {VALUE._col12} {VALUE._col24} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col12, _col13, _col25, _col36, _col37 + Statistics: Num rows: 18 Data size: 1980 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string), _col37 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col12} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col12, _col13, _col25 + Statistics: Num rows: 17 Data size: 1800 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 17 Data size: 1800 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col12 (type: int), _col13 (type: string), _col25 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col12, _col13 + Statistics: Num rows: 8 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 8 Data size: 930 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select p1.p_name, p2.p_name, p3.p_name, p4.p_name +from part p1 join part p2 on p2.p_name = p1.p_name , part p3 , part p4 +where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey + and p1.p_partkey = p2.p_partkey +PREHOOK: type: QUERY +POSTHOOK: query: explain select p1.p_name, p2.p_name, p3.p_name, p4.p_name +from part p1 join part p2 on p2.p_name = p1.p_name , part p3 , part p4 +where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey + and p1.p_partkey = p2.p_partkey +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p4 + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 31 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 16 Data size: 1637 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 16 Data size: 1637 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 8 Data size: 846 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 8 Data size: 846 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 8 Data size: 846 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 8 Data size: 846 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col11} {VALUE._col12} {VALUE._col24} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col12, _col13, _col25, _col36, _col37 + Statistics: Num rows: 18 Data size: 1980 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string), _col37 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 220 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col12} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col12, _col13, _col25 + Statistics: Num rows: 17 Data size: 1800 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 17 Data size: 1800 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col12 (type: int), _col13 (type: string), _col25 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col12, _col13 + Statistics: Num rows: 8 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 8 Data size: 930 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/join_array.q.out ql/src/test/results/clientpositive/spark/join_array.q.out new file mode 100644 index 0000000..66c143a --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_array.q.out @@ -0,0 +1,63 @@ +PREHOOK: query: create table tinyA(a bigint, b bigint) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tinyA +POSTHOOK: query: create table tinyA(a bigint, b bigint) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tinyA +PREHOOK: query: create table tinyB(a bigint, bList array) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tinyB +POSTHOOK: query: create table tinyB(a bigint, bList array) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tinyB +PREHOOK: query: load data local inpath '../../data/files/tiny_a.txt' into table tinyA +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tinya +POSTHOOK: query: load data local inpath '../../data/files/tiny_a.txt' into table tinyA +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tinya +PREHOOK: query: load data local inpath '../../data/files/tiny_b.txt' into table tinyB +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tinyb +POSTHOOK: query: load data local inpath '../../data/files/tiny_b.txt' into table tinyB +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tinyb +PREHOOK: query: select * from tinyA +PREHOOK: type: QUERY +PREHOOK: Input: default@tinya +#### A masked pattern was here #### +POSTHOOK: query: select * from tinyA +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tinya +#### A masked pattern was here #### +10320092026892491 3312 +PREHOOK: query: select * from tinyB +PREHOOK: type: QUERY +PREHOOK: Input: default@tinyb +#### A masked pattern was here #### +POSTHOOK: query: select * from tinyB +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tinyb +#### A masked pattern was here #### +10320092002467760 [0,23,37,48,53,55,55,56,60,66,72,76,77,78,80,81,87,88,90,90,91,90,92,97,100,103,104,107,108,108,109,110,113,113,113,113,113,113,114,116,116,116,117,116,117,117,117,115,115,117,117,117,121,120,131,131,131,125,125,124,124,128,128,131,131,132,133,134,134,134,134,26,26,null,null,null,null,116] +10320092026892491 [0,2,59,106,131,142,159,244,320,398,417,433,553,616,710,826,917,971,1046,1051,1093,1112,1142,1215,1220,1226,1232,1267,1364,1549,1646,1948,2170,2272,2325,2433,2534,2852,2925,2992,3119,3207,3279,3323,3412,3637,3645,3634,3450,3473,3638,3688,3736,3758,3812,3862,3873,3868,3883,4118,4134,4127,4170,4216,null,null,null,null,3139] +PREHOOK: query: select tinyB.a, tinyB.bList from tinyB full outer join tinyA on tinyB.a = tinyA.a +PREHOOK: type: QUERY +PREHOOK: Input: default@tinya +PREHOOK: Input: default@tinyb +#### A masked pattern was here #### +POSTHOOK: query: select tinyB.a, tinyB.bList from tinyB full outer join tinyA on tinyB.a = tinyA.a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tinya +POSTHOOK: Input: default@tinyb +#### A masked pattern was here #### +10320092002467760 [0,23,37,48,53,55,55,56,60,66,72,76,77,78,80,81,87,88,90,90,91,90,92,97,100,103,104,107,108,108,109,110,113,113,113,113,113,113,114,116,116,116,117,116,117,117,117,115,115,117,117,117,121,120,131,131,131,125,125,124,124,128,128,131,131,132,133,134,134,134,134,26,26,null,null,null,null,116] +10320092026892491 [0,2,59,106,131,142,159,244,320,398,417,433,553,616,710,826,917,971,1046,1051,1093,1112,1142,1215,1220,1226,1232,1267,1364,1549,1646,1948,2170,2272,2325,2433,2534,2852,2925,2992,3119,3207,3279,3323,3412,3637,3645,3634,3450,3473,3638,3688,3736,3758,3812,3862,3873,3868,3883,4118,4134,4127,4170,4216,null,null,null,null,3139] diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out new file mode 100644 index 0000000..78b37ca --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out @@ -0,0 +1,417 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@part +PREHOOK: query: explain select * +from part p1 join part p2 join part p3 on p1.p_name = p2.p_name and p2.p_name = p3.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part p2 join part p3 on p1.p_name = p2.p_name and p2.p_name = p3.p_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 6 Data size: 4186 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 6 Data size: 4186 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 4186 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * +from part p1 join part p2 join part p3 on p2.p_name = p1.p_name and p3.p_name = p2.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part p2 join part p3 on p2.p_name = p1.p_name and p3.p_name = p2.p_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 6 Data size: 4186 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 6 Data size: 4186 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 4186 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * +from part p1 join part p2 join part p3 on p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part p2 join part p3 on p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 3 Data size: 2093 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 3 Data size: 2093 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 2093 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 5 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 698 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 1 Data size: 698 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * +from part p1 join part p2 join part p3 on p2.p_partkey = 1 and p3.p_name = p2.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part p2 join part p3 on p2.p_partkey = 1 and p3.p_name = p2.p_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((p_partkey = 1) and p_name is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 634 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 634 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 5 Data size: 3839 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 5 Data size: 3839 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 3839 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 5 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 5 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out new file mode 100644 index 0000000..0411dcd --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out @@ -0,0 +1,284 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@part +PREHOOK: query: explain select * +from part p1 join part p2 join part p3 on p1.p_name = p2.p_name join part p4 on p2.p_name = p3.p_name and p1.p_name = p4.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part p2 join part p3 on p1.p_name = p2.p_name join part p4 on p2.p_name = p3.p_name and p1.p_name = p4.p_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p4 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 0 to 3 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 3 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 9 Data size: 6279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 9 Data size: 6279 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 6279 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * +from part p1 join part p2 join part p3 on p2.p_name = p1.p_name join part p4 on p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey + and p1.p_partkey = p2.p_partkey +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part p2 join part p3 on p2.p_name = p1.p_name join part p4 on p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey + and p1.p_partkey = p2.p_partkey +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p4 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 1269 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 2 Data size: 1269 Basic stats: COMPLETE Column stats: NONE + value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 1269 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 2 Data size: 1269 Basic stats: COMPLETE Column stats: NONE + value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 3 Data size: 2302 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 3 Data size: 2302 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 2302 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 3 Data size: 2093 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 2093 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 2 Data size: 1395 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 2 Data size: 1395 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out new file mode 100644 index 0000000..5b9799b --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out @@ -0,0 +1,437 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@part +PREHOOK: query: explain select * +from part p1 join part p2 join part p3 +where p1.p_name = p2.p_name and p2.p_name = p3.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part p2 join part p3 +where p1.p_name = p2.p_name and p2.p_name = p3.p_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 6 Data size: 4186 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 = _col13) and (_col13 = _col25)) (type: boolean) + Statistics: Num rows: 1 Data size: 697 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 1 Data size: 697 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 697 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * +from part p1 join part p2 join part p3 +where p2.p_name = p1.p_name and p3.p_name = p2.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part p2 join part p3 +where p2.p_name = p1.p_name and p3.p_name = p2.p_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 6 Data size: 4186 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col13 = _col1) and (_col25 = _col13)) (type: boolean) + Statistics: Num rows: 1 Data size: 697 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 1 Data size: 697 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 697 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * +from part p1 join part p2 join part p3 +where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part p2 join part p3 +where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 3 Data size: 2093 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and (_col25 = _col13)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 5 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 698 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 1 Data size: 698 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * +from part p1 join part p2 join part p3 +where p2.p_partkey = 1 and p3.p_name = p2.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part p2 join part p3 +where p2.p_partkey = 1 and p3.p_name = p2.p_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and (p_partkey = 1)) (type: boolean) + Statistics: Num rows: 1 Data size: 634 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 634 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 5 Data size: 3839 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col25 = _col13) (type: boolean) + Statistics: Num rows: 2 Data size: 1535 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 2 Data size: 1535 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 1535 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 5 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 5 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out new file mode 100644 index 0000000..00ef044 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out @@ -0,0 +1,294 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@part +PREHOOK: query: explain select * +from part p1 join part p2 join part p3 on p1.p_name = p2.p_name join part p4 +where p2.p_name = p3.p_name and p1.p_name = p4.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part p2 join part p3 on p1.p_name = p2.p_name join part p4 +where p2.p_name = p3.p_name and p1.p_name = p4.p_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p4 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 0 to 3 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 3 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 9 Data size: 6279 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col13 = _col25) and (_col1 = _col37)) (type: boolean) + Statistics: Num rows: 2 Data size: 1395 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 2 Data size: 1395 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 1395 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * +from part p1 join part p2 join part p3 on p2.p_name = p1.p_name join part p4 +where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey + and p1.p_partkey = p2.p_partkey +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part p2 join part p3 on p2.p_name = p1.p_name join part p4 +where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey + and p1.p_partkey = p2.p_partkey +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p4 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 1269 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 2 Data size: 1269 Basic stats: COMPLETE Column stats: NONE + value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 1269 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 2 Data size: 1269 Basic stats: COMPLETE Column stats: NONE + value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 3 Data size: 2302 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 3 Data size: 2093 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 2093 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 2 Data size: 1395 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 2 Data size: 1395 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out new file mode 100644 index 0000000..02afd2a --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out @@ -0,0 +1,473 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@part +PREHOOK: query: create table part2( + p2_partkey INT, + p2_name STRING, + p2_mfgr STRING, + p2_brand STRING, + p2_type STRING, + p2_size INT, + p2_container STRING, + p2_retailprice DOUBLE, + p2_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part2 +POSTHOOK: query: create table part2( + p2_partkey INT, + p2_name STRING, + p2_mfgr STRING, + p2_brand STRING, + p2_type STRING, + p2_size INT, + p2_container STRING, + p2_retailprice DOUBLE, + p2_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part2 +PREHOOK: query: create table part3( + p3_partkey INT, + p3_name STRING, + p3_mfgr STRING, + p3_brand STRING, + p3_type STRING, + p3_size INT, + p3_container STRING, + p3_retailprice DOUBLE, + p3_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part3 +POSTHOOK: query: create table part3( + p3_partkey INT, + p3_name STRING, + p3_mfgr STRING, + p3_brand STRING, + p3_type STRING, + p3_size INT, + p3_container STRING, + p3_retailprice DOUBLE, + p3_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part3 +PREHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 on p1.p_name = p2_name and p2_name = p3_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 on p1.p_name = p2_name and p2_name = p3_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p2_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p2_name (type: string) + sort order: + + Map-reduce partition columns: p2_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 6 Data size: 4186 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 6 Data size: 4186 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 4186 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 on p2_name = p1.p_name and p3_name = p2_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 on p2_name = p1.p_name and p3_name = p2_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p2_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p2_name (type: string) + sort order: + + Map-reduce partition columns: p2_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 6 Data size: 4186 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 6 Data size: 4186 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 4186 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 on p2_partkey + p_partkey = p1.p_partkey and p3_name = p2_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 on p2_partkey + p_partkey = p1.p_partkey and p3_name = p2_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p2_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 1 Data size: 767 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 1 Data size: 767 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 767 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 5 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 698 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 1 Data size: 698 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 on p2_partkey = 1 and p3_name = p2_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 on p2_partkey = 1 and p3_name = p2_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: ((p2_partkey = 1) and p2_name is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 5 Data size: 3839 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 5 Data size: 3839 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 3839 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 5 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 5 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out new file mode 100644 index 0000000..8f0390a --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out @@ -0,0 +1,340 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@part +PREHOOK: query: create table part2( + p2_partkey INT, + p2_name STRING, + p2_mfgr STRING, + p2_brand STRING, + p2_type STRING, + p2_size INT, + p2_container STRING, + p2_retailprice DOUBLE, + p2_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part2 +POSTHOOK: query: create table part2( + p2_partkey INT, + p2_name STRING, + p2_mfgr STRING, + p2_brand STRING, + p2_type STRING, + p2_size INT, + p2_container STRING, + p2_retailprice DOUBLE, + p2_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part2 +PREHOOK: query: create table part3( + p3_partkey INT, + p3_name STRING, + p3_mfgr STRING, + p3_brand STRING, + p3_type STRING, + p3_size INT, + p3_container STRING, + p3_retailprice DOUBLE, + p3_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part3 +POSTHOOK: query: create table part3( + p3_partkey INT, + p3_name STRING, + p3_mfgr STRING, + p3_brand STRING, + p3_type STRING, + p3_size INT, + p3_container STRING, + p3_retailprice DOUBLE, + p3_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part3 +PREHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 on p1.p_name = p2_name join part p4 on p2_name = p3_name and p1.p_name = p4.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 on p1.p_name = p2_name join part p4 on p2_name = p3_name and p1.p_name = p4.p_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p4 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p2_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p2_name (type: string) + sort order: + + Map-reduce partition columns: p2_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 0 to 3 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 3 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 9 Data size: 6279 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 9 Data size: 6279 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 6279 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 on p2_name = p1.p_name join part p4 on p2_name = p3_name and p1.p_partkey = p4.p_partkey + and p1.p_partkey = p2_partkey +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 on p2_name = p1.p_name join part p4 on p2_name = p3_name and p1.p_partkey = p4.p_partkey + and p1.p_partkey = p2_partkey +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p4 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p2_name (type: string), p2_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p2_name (type: string), p2_partkey (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 1269 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 2 Data size: 1269 Basic stats: COMPLETE Column stats: NONE + value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 3 Data size: 2093 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 3 Data size: 2093 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 2093 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 2 Data size: 1534 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 1534 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 2 Data size: 1395 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 2 Data size: 1395 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out new file mode 100644 index 0000000..85de5db --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out @@ -0,0 +1,493 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@part +PREHOOK: query: create table part2( + p2_partkey INT, + p2_name STRING, + p2_mfgr STRING, + p2_brand STRING, + p2_type STRING, + p2_size INT, + p2_container STRING, + p2_retailprice DOUBLE, + p2_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part2 +POSTHOOK: query: create table part2( + p2_partkey INT, + p2_name STRING, + p2_mfgr STRING, + p2_brand STRING, + p2_type STRING, + p2_size INT, + p2_container STRING, + p2_retailprice DOUBLE, + p2_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part2 +PREHOOK: query: create table part3( + p3_partkey INT, + p3_name STRING, + p3_mfgr STRING, + p3_brand STRING, + p3_type STRING, + p3_size INT, + p3_container STRING, + p3_retailprice DOUBLE, + p3_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part3 +POSTHOOK: query: create table part3( + p3_partkey INT, + p3_name STRING, + p3_mfgr STRING, + p3_brand STRING, + p3_type STRING, + p3_size INT, + p3_container STRING, + p3_retailprice DOUBLE, + p3_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part3 +PREHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 +where p1.p_name = p2_name and p2_name = p3_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 +where p1.p_name = p2_name and p2_name = p3_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p2_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p2_name (type: string) + sort order: + + Map-reduce partition columns: p2_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 6 Data size: 4186 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 = _col13) and (_col13 = _col25)) (type: boolean) + Statistics: Num rows: 1 Data size: 697 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 1 Data size: 697 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 697 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 +where p2_name = p1.p_name and p3_name = p2_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 +where p2_name = p1.p_name and p3_name = p2_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p2_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p2_name (type: string) + sort order: + + Map-reduce partition columns: p2_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 6 Data size: 4186 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col13 = _col1) and (_col25 = _col13)) (type: boolean) + Statistics: Num rows: 1 Data size: 697 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 1 Data size: 697 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 697 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 +where p2_partkey + p1.p_partkey = p1.p_partkey and p3_name = p2_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 +where p2_partkey + p1.p_partkey = p1.p_partkey and p3_name = p2_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p2_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 1 Data size: 767 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and (_col25 = _col13)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 5 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 698 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 1 Data size: 698 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 +where p2_partkey = 1 and p3_name = p2_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 +where p2_partkey = 1 and p3_name = p2_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (p2_name is not null and (p2_partkey = 1)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 5 Data size: 3839 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col25 = _col13) (type: boolean) + Statistics: Num rows: 2 Data size: 1535 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 2 Data size: 1535 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 1535 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 5 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 5 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out new file mode 100644 index 0000000..6e59ff2 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out @@ -0,0 +1,350 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@part +PREHOOK: query: create table part2( + p2_partkey INT, + p2_name STRING, + p2_mfgr STRING, + p2_brand STRING, + p2_type STRING, + p2_size INT, + p2_container STRING, + p2_retailprice DOUBLE, + p2_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part2 +POSTHOOK: query: create table part2( + p2_partkey INT, + p2_name STRING, + p2_mfgr STRING, + p2_brand STRING, + p2_type STRING, + p2_size INT, + p2_container STRING, + p2_retailprice DOUBLE, + p2_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part2 +PREHOOK: query: create table part3( + p3_partkey INT, + p3_name STRING, + p3_mfgr STRING, + p3_brand STRING, + p3_type STRING, + p3_size INT, + p3_container STRING, + p3_retailprice DOUBLE, + p3_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part3 +POSTHOOK: query: create table part3( + p3_partkey INT, + p3_name STRING, + p3_mfgr STRING, + p3_brand STRING, + p3_type STRING, + p3_size INT, + p3_container STRING, + p3_retailprice DOUBLE, + p3_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part3 +PREHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 on p1.p_name = p2_name join part p4 +where p2_name = p3_name and p1.p_name = p4.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 on p1.p_name = p2_name join part p4 +where p2_name = p3_name and p1.p_name = p4.p_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p4 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p2_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p2_name (type: string) + sort order: + + Map-reduce partition columns: p2_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 0 to 3 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 3 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 9 Data size: 6279 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col13 = _col25) and (_col1 = _col37)) (type: boolean) + Statistics: Num rows: 2 Data size: 1395 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 2 Data size: 1395 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 1395 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 on p2_name = p1.p_name join part p4 +where p2_name = p3_name and p1.p_partkey = p4.p_partkey + and p1.p_partkey = p2_partkey +PREHOOK: type: QUERY +POSTHOOK: query: explain select * +from part p1 join part2 p2 join part3 p3 on p2_name = p1.p_name join part p4 +where p2_name = p3_name and p1.p_partkey = p4.p_partkey + and p1.p_partkey = p2_partkey +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p4 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 3 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p2_name (type: string), p2_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p2_name (type: string), p2_partkey (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 5 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 1269 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 2 Data size: 1269 Basic stats: COMPLETE Column stats: NONE + value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 3 Data size: 2093 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 2 Data size: 1534 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 1534 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 2 Data size: 1395 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 2 Data size: 1395 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/join_empty.q.out ql/src/test/results/clientpositive/spark/join_empty.q.out new file mode 100644 index 0000000..344345b --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_empty.q.out @@ -0,0 +1,56 @@ +PREHOOK: query: create table srcpart_empty(key int, value string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_empty +POSTHOOK: query: create table srcpart_empty(key int, value string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_empty +PREHOOK: query: create table src2_empty (key int, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src2_empty +POSTHOOK: query: create table src2_empty (key int, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src2_empty +PREHOOK: query: select /*+mapjoin(a)*/ a.key, b.value from srcpart_empty a join src b on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart_empty +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ a.key, b.value from srcpart_empty a join src b on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart_empty +#### A masked pattern was here #### +PREHOOK: query: select /*+mapjoin(a)*/ a.key, b.value from src2_empty a join src b on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src2_empty +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ a.key, b.value from src2_empty a join src b on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src2_empty +#### A masked pattern was here #### +PREHOOK: query: select a.key, b.value from srcpart_empty a join src b on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart_empty +#### A masked pattern was here #### +POSTHOOK: query: select a.key, b.value from srcpart_empty a join src b on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart_empty +#### A masked pattern was here #### +PREHOOK: query: select a.key, b.value from src2_empty a join src b on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src2_empty +#### A masked pattern was here #### +POSTHOOK: query: select a.key, b.value from src2_empty a join src b on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src2_empty +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/join_filters.q.out ql/src/test/results/clientpositive/spark/join_filters.q.out new file mode 100644 index 0000000..64d858b --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_filters.q.out @@ -0,0 +1,1352 @@ +PREHOOK: query: -- SORT_AND_HASH_QUERY_RESULTS + +CREATE TABLE myinput1(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@myinput1 +POSTHOOK: query: -- SORT_AND_HASH_QUERY_RESULTS + +CREATE TABLE myinput1(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@myinput1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@myinput1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@myinput1 +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +12 35 NULL NULL +48 NULL NULL NULL +NULL 40 NULL NULL +M3MWtBJdRXSWIJY5Qr/otw== +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +NULL NULL 12 35 +NULL NULL 48 NULL +NULL NULL NULL 40 +LNZKrcVNAvaeDALnsg72bw== +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +12 35 NULL NULL +48 NULL NULL NULL +NULL 40 NULL NULL +NULL NULL 12 35 +NULL NULL 48 NULL +NULL NULL NULL 40 +JIHZKZaNhNR9LYBcRFyxng== +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +12 35 NULL NULL +48 NULL NULL NULL +NULL 40 NULL NULL +M3MWtBJdRXSWIJY5Qr/otw== +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +12 35 NULL NULL +48 NULL NULL NULL +NULL 40 NULL NULL +M3MWtBJdRXSWIJY5Qr/otw== +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +12 35 NULL NULL +48 NULL NULL NULL +NULL 40 NULL NULL +M3MWtBJdRXSWIJY5Qr/otw== +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +12 35 NULL NULL +48 NULL NULL NULL +NULL 40 NULL NULL +M3MWtBJdRXSWIJY5Qr/otw== +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +NULL NULL 12 35 +NULL NULL 48 NULL +NULL NULL NULL 40 +LNZKrcVNAvaeDALnsg72bw== +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +NULL NULL 12 35 +NULL NULL 48 NULL +NULL NULL NULL 40 +LNZKrcVNAvaeDALnsg72bw== +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +NULL NULL 12 35 +NULL NULL 48 NULL +NULL NULL NULL 40 +LNZKrcVNAvaeDALnsg72bw== +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +NULL NULL 12 35 +NULL NULL 48 NULL +NULL NULL NULL 40 +LNZKrcVNAvaeDALnsg72bw== +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +12 35 NULL NULL +48 NULL NULL NULL +NULL 40 NULL NULL +NULL NULL 12 35 +NULL NULL 48 NULL +NULL NULL NULL 40 +JIHZKZaNhNR9LYBcRFyxng== +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +12 35 NULL NULL +48 NULL NULL NULL +NULL 40 NULL NULL +NULL NULL 12 35 +NULL NULL 48 NULL +NULL NULL NULL 40 +JIHZKZaNhNR9LYBcRFyxng== +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +12 35 NULL NULL +48 NULL NULL NULL +NULL 40 NULL NULL +NULL NULL 12 35 +NULL NULL 48 NULL +NULL NULL NULL 40 +JIHZKZaNhNR9LYBcRFyxng== +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +12 35 NULL NULL +48 NULL NULL NULL +NULL 40 NULL NULL +NULL NULL 12 35 +NULL NULL 48 NULL +NULL NULL NULL 40 +JIHZKZaNhNR9LYBcRFyxng== +PREHOOK: query: SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 100 100 +NULL NULL NULL NULL 12 35 +NULL NULL NULL NULL 48 NULL +NULL NULL NULL NULL NULL 40 +AzUxen/yR7DlsL00zfSITA== +PREHOOK: query: SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 100 100 +NULL NULL 12 35 NULL NULL +NULL NULL 48 NULL NULL NULL +NULL NULL NULL 40 NULL NULL +BYad/CYbc/RASCgl63S7Ww== +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 100 100 +NULL NULL NULL NULL 12 35 +NULL NULL NULL NULL 48 NULL +NULL NULL NULL NULL NULL 40 +AzUxen/yR7DlsL00zfSITA== +PREHOOK: query: SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 100 100 +NULL NULL NULL NULL 12 35 +NULL NULL NULL NULL 48 NULL +NULL NULL NULL NULL NULL 40 +AzUxen/yR7DlsL00zfSITA== +PREHOOK: query: SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 100 100 +NULL NULL 12 35 NULL NULL +NULL NULL 48 NULL NULL NULL +NULL NULL NULL 40 NULL NULL +BYad/CYbc/RASCgl63S7Ww== +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 100 100 +NULL NULL NULL NULL 12 35 +NULL NULL NULL NULL 48 NULL +NULL NULL NULL NULL NULL 40 +AzUxen/yR7DlsL00zfSITA== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +12 35 NULL NULL +48 NULL NULL NULL +NULL 40 NULL NULL +M3MWtBJdRXSWIJY5Qr/otw== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +12 35 NULL NULL +48 NULL NULL NULL +NULL 40 NULL NULL +M3MWtBJdRXSWIJY5Qr/otw== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +12 35 NULL NULL +48 NULL NULL NULL +NULL 40 NULL NULL +M3MWtBJdRXSWIJY5Qr/otw== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +NULL NULL 12 35 +NULL NULL 48 NULL +NULL NULL NULL 40 +LNZKrcVNAvaeDALnsg72bw== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +NULL NULL 12 35 +NULL NULL 48 NULL +NULL NULL NULL 40 +LNZKrcVNAvaeDALnsg72bw== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +NULL NULL 12 35 +NULL NULL 48 NULL +NULL NULL NULL 40 +LNZKrcVNAvaeDALnsg72bw== +PREHOOK: query: CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_input1 +POSTHOOK: query: CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_input1 +PREHOOK: query: CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_input2 +POSTHOOK: query: CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_input2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_input1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_input1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_input1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_input1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_input2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_input2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_input2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_input2 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key AND a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key AND a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a JOIN smb_input2 b ON a.key = b.key AND a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a JOIN smb_input2 b ON a.key = b.key AND a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +100 100 100 100 +148 NULL NULL NULL +200 200 200 200 +48 NULL NULL NULL +NULL 135 NULL NULL +NULL 35 NULL NULL +t2boI39B33IANcqlsXXA3g== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input2 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input2 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +148 NULL NULL NULL +200 200 200 200 +48 NULL NULL NULL +NULL 135 NULL NULL +NULL 35 NULL NULL +t2boI39B33IANcqlsXXA3g== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a LEFT OUTER JOIN smb_input2 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a LEFT OUTER JOIN smb_input2 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +148 NULL NULL NULL +200 200 200 200 +48 NULL NULL NULL +NULL 135 NULL NULL +NULL 35 NULL NULL +t2boI39B33IANcqlsXXA3g== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +NULL NULL 148 NULL +NULL NULL 48 NULL +NULL NULL NULL 135 +NULL NULL NULL 35 +UBr9lyqgsjDFvooMgQlZ9w== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input2 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input2 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +NULL NULL 148 NULL +NULL NULL 48 NULL +NULL NULL NULL 135 +NULL NULL NULL 35 +UBr9lyqgsjDFvooMgQlZ9w== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a RIGHT OUTER JOIN smb_input2 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a RIGHT OUTER JOIN smb_input2 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +NULL NULL 148 NULL +NULL NULL 48 NULL +NULL NULL NULL 135 +NULL NULL NULL 35 +UBr9lyqgsjDFvooMgQlZ9w== +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 100 100 +WPjT9iK+FjpywFhoiQ0jvw== +PREHOOK: query: SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 100 100 +WPjT9iK+FjpywFhoiQ0jvw== +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 100 100 +WPjT9iK+FjpywFhoiQ0jvw== +PREHOOK: query: SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 100 100 +WPjT9iK+FjpywFhoiQ0jvw== +PREHOOK: query: SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 100 100 +WPjT9iK+FjpywFhoiQ0jvw== +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 100 100 +WPjT9iK+FjpywFhoiQ0jvw== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +YaI1msgLVpfEx943Tfea/Q== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key AND a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key AND a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a JOIN smb_input2 b ON a.key = b.key AND a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a JOIN smb_input2 b ON a.key = b.key AND a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input2 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input2 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a LEFT OUTER JOIN smb_input2 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a LEFT OUTER JOIN smb_input2 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input2 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input2 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a RIGHT OUTER JOIN smb_input2 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a RIGHT OUTER JOIN smb_input2 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +fRHAbs52npY0rAF1I1Npfg== diff --git ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out new file mode 100644 index 0000000..78c2b69 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out @@ -0,0 +1,2010 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- HIVE-3411 Filter predicates on outer join overlapped on single alias is not handled properly + +create table a as SELECT 100 as key, a.value as value FROM src LATERAL VIEW explode(array(40, 50, 60)) a as value limit 3 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@a +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- HIVE-3411 Filter predicates on outer join overlapped on single alias is not handled properly + +create table a as SELECT 100 as key, a.value as value FROM src LATERAL VIEW explode(array(40, 50, 60)) a as value limit 3 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: -- overlap on a +explain extended select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) +PREHOOK: type: QUERY +POSTHOOK: query: -- overlap on a +explain extended select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_LEFTOUTERJOIN + TOK_LEFTOUTERJOIN + TOK_TABREF + TOK_TABNAME + a + TOK_TABREF + TOK_TABNAME + a + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + value + 50 + = + . + TOK_TABLE_OR_COL + b + value + 50 + TOK_TABREF + TOK_TABNAME + a + c + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + c + key + = + . + TOK_TABLE_OR_COL + a + value + 60 + = + . + TOK_TABLE_OR_COL + c + value + 60 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 50) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Truncated Path -> Alias: + /a [b] + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 60) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + tag: 2 + value expressions: value (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Truncated Path -> Alias: + /a [c] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Truncated Path -> Alias: + /a [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter mappings: + 0 [1, 1, 2, 1] + filter predicates: + 0 {(VALUE._col0 = 50)} {(VALUE._col0 = 60)} + 1 + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:int:int:int:int:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +100 40 NULL NULL NULL NULL +100 50 100 50 NULL NULL +100 60 NULL NULL 100 60 +PREHOOK: query: select /*+ MAPJOIN(b,c)*/ * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select /*+ MAPJOIN(b,c)*/ * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +100 40 NULL NULL NULL NULL +100 50 100 50 NULL NULL +100 60 NULL NULL 100 60 +PREHOOK: query: -- overlap on b +explain extended select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) +PREHOOK: type: QUERY +POSTHOOK: query: -- overlap on b +explain extended select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_LEFTOUTERJOIN + TOK_RIGHTOUTERJOIN + TOK_TABREF + TOK_TABNAME + a + TOK_TABREF + TOK_TABNAME + a + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + value + 50 + = + . + TOK_TABLE_OR_COL + b + value + 50 + TOK_TABREF + TOK_TABNAME + a + c + AND + AND + = + . + TOK_TABLE_OR_COL + b + key + . + TOK_TABLE_OR_COL + c + key + = + . + TOK_TABLE_OR_COL + b + value + 60 + = + . + TOK_TABLE_OR_COL + c + value + 60 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Truncated Path -> Alias: + /a [b] + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 60) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + tag: 2 + value expressions: value (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Truncated Path -> Alias: + /a [c] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 50) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Truncated Path -> Alias: + /a [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter mappings: + 1 [0, 1, 2, 1] + filter predicates: + 0 + 1 {(VALUE._col0 = 50)} {(VALUE._col0 = 60)} + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:int:int:int:int:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +100 50 100 50 NULL NULL +NULL NULL 100 40 NULL NULL +NULL NULL 100 60 100 60 +PREHOOK: query: select /*+ MAPJOIN(a,c)*/ * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select /*+ MAPJOIN(a,c)*/ * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +100 50 100 50 NULL NULL +NULL NULL 100 40 NULL NULL +NULL NULL 100 60 100 60 +PREHOOK: query: -- overlap on b with two filters for each +explain extended select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60) +PREHOOK: type: QUERY +POSTHOOK: query: -- overlap on b with two filters for each +explain extended select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_LEFTOUTERJOIN + TOK_RIGHTOUTERJOIN + TOK_TABREF + TOK_TABNAME + a + TOK_TABREF + TOK_TABNAME + a + b + AND + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + value + 50 + = + . + TOK_TABLE_OR_COL + b + value + 50 + > + . + TOK_TABLE_OR_COL + b + value + 10 + TOK_TABREF + TOK_TABNAME + a + c + AND + AND + AND + = + . + TOK_TABLE_OR_COL + b + key + . + TOK_TABLE_OR_COL + c + key + = + . + TOK_TABLE_OR_COL + b + value + 60 + > + . + TOK_TABLE_OR_COL + b + value + 20 + = + . + TOK_TABLE_OR_COL + c + value + 60 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Truncated Path -> Alias: + /a [b] + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 60) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + tag: 2 + value expressions: value (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Truncated Path -> Alias: + /a [c] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 50) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Truncated Path -> Alias: + /a [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter mappings: + 1 [0, 2, 2, 2] + filter predicates: + 0 + 1 {(VALUE._col0 = 50)} {(VALUE._col0 > 10)} {(VALUE._col0 = 60)} {(VALUE._col0 > 20)} + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:int:int:int:int:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +100 50 100 50 NULL NULL +NULL NULL 100 40 NULL NULL +NULL NULL 100 60 100 60 +PREHOOK: query: select /*+ MAPJOIN(a,c)*/ * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select /*+ MAPJOIN(a,c)*/ * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +100 50 100 50 NULL NULL +NULL NULL 100 40 NULL NULL +NULL NULL 100 60 100 60 +PREHOOK: query: -- overlap on a, b +explain extended select * from a full outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +PREHOOK: type: QUERY +POSTHOOK: query: -- overlap on a, b +explain extended select * from a full outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_LEFTOUTERJOIN + TOK_LEFTOUTERJOIN + TOK_FULLOUTERJOIN + TOK_TABREF + TOK_TABNAME + a + TOK_TABREF + TOK_TABNAME + a + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + value + 50 + = + . + TOK_TABLE_OR_COL + b + value + 50 + TOK_TABREF + TOK_TABNAME + a + c + AND + AND + = + . + TOK_TABLE_OR_COL + b + key + . + TOK_TABLE_OR_COL + c + key + = + . + TOK_TABLE_OR_COL + b + value + 60 + = + . + TOK_TABLE_OR_COL + c + value + 60 + TOK_TABREF + TOK_TABNAME + a + d + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + d + key + = + . + TOK_TABLE_OR_COL + a + value + 40 + = + . + TOK_TABLE_OR_COL + d + value + 40 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 40) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + tag: 3 + value expressions: value (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Truncated Path -> Alias: + /a [d] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Truncated Path -> Alias: + /a [b] + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 60) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + tag: 2 + value expressions: value (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Truncated Path -> Alias: + /a [c] + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Truncated Path -> Alias: + /a [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + Left Outer Join1 to 2 + Left Outer Join0 to 3 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + 3 {KEY.reducesinkkey0} {VALUE._col0} + filter mappings: + 0 [1, 1, 3, 1] + 1 [0, 1, 2, 1] + filter predicates: + 0 {(VALUE._col0 = 50)} {(VALUE._col0 = 40)} + 1 {(VALUE._col0 = 50)} {(VALUE._col0 = 60)} + 2 + 3 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int), _col15 (type: int), _col16 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types int:int:int:int:int:int:int:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from a full outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select * from a full outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +100 40 NULL NULL NULL NULL 100 40 +100 50 100 50 NULL NULL NULL NULL +100 60 NULL NULL NULL NULL NULL NULL +NULL NULL 100 40 NULL NULL NULL NULL +NULL NULL 100 60 100 60 NULL NULL +PREHOOK: query: -- triple overlap on a +explain extended select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +PREHOOK: type: QUERY +POSTHOOK: query: -- triple overlap on a +explain extended select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_LEFTOUTERJOIN + TOK_LEFTOUTERJOIN + TOK_LEFTOUTERJOIN + TOK_TABREF + TOK_TABNAME + a + TOK_TABREF + TOK_TABNAME + a + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + value + 50 + = + . + TOK_TABLE_OR_COL + b + value + 50 + TOK_TABREF + TOK_TABNAME + a + c + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + c + key + = + . + TOK_TABLE_OR_COL + a + value + 60 + = + . + TOK_TABLE_OR_COL + c + value + 60 + TOK_TABREF + TOK_TABNAME + a + d + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + d + key + = + . + TOK_TABLE_OR_COL + a + value + 40 + = + . + TOK_TABLE_OR_COL + d + value + 40 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 40) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + tag: 3 + value expressions: value (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Truncated Path -> Alias: + /a [d] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 50) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Truncated Path -> Alias: + /a [b] + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 60) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + tag: 2 + value expressions: value (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Truncated Path -> Alias: + /a [c] + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct a { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a + name: default.a + Truncated Path -> Alias: + /a [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + Left Outer Join0 to 3 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + 3 {KEY.reducesinkkey0} {VALUE._col0} + filter mappings: + 0 [1, 1, 2, 1, 3, 1] + filter predicates: + 0 {(VALUE._col0 = 50)} {(VALUE._col0 = 60)} {(VALUE._col0 = 40)} + 1 + 2 + 3 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int), _col15 (type: int), _col16 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types int:int:int:int:int:int:int:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +100 40 NULL NULL NULL NULL 100 40 +100 50 100 50 NULL NULL NULL NULL +100 60 NULL NULL 100 60 NULL NULL +PREHOOK: query: select /*+ MAPJOIN(b,c, d)*/ * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +PREHOOK: type: QUERY +PREHOOK: Input: default@a +#### A masked pattern was here #### +POSTHOOK: query: select /*+ MAPJOIN(b,c, d)*/ * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) left outer join a d on (a.key=d.key AND a.value=40 AND d.value=40) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +#### A masked pattern was here #### +100 40 NULL NULL NULL NULL 100 40 +100 50 100 50 NULL NULL NULL NULL +100 60 NULL NULL 100 60 NULL NULL diff --git ql/src/test/results/clientpositive/spark/join_hive_626.q.out ql/src/test/results/clientpositive/spark/join_hive_626.q.out new file mode 100644 index 0000000..e2a12a6 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_hive_626.q.out @@ -0,0 +1,179 @@ +PREHOOK: query: create table hive_foo (foo_id int, foo_name string, foo_a string, foo_b string, +foo_c string, foo_d string) row format delimited fields terminated by ',' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hive_foo +POSTHOOK: query: create table hive_foo (foo_id int, foo_name string, foo_a string, foo_b string, +foo_c string, foo_d string) row format delimited fields terminated by ',' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hive_foo +PREHOOK: query: create table hive_bar (bar_id int, bar_0 int, foo_id int, bar_1 int, bar_name +string, bar_a string, bar_b string, bar_c string, bar_d string) row format +delimited fields terminated by ',' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hive_bar +POSTHOOK: query: create table hive_bar (bar_id int, bar_0 int, foo_id int, bar_1 int, bar_name +string, bar_a string, bar_b string, bar_c string, bar_d string) row format +delimited fields terminated by ',' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hive_bar +PREHOOK: query: create table hive_count (bar_id int, n int) row format delimited fields +terminated by ',' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hive_count +POSTHOOK: query: create table hive_count (bar_id int, n int) row format delimited fields +terminated by ',' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hive_count +PREHOOK: query: load data local inpath '../../data/files/hive_626_foo.txt' overwrite into table hive_foo +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@hive_foo +POSTHOOK: query: load data local inpath '../../data/files/hive_626_foo.txt' overwrite into table hive_foo +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@hive_foo +PREHOOK: query: load data local inpath '../../data/files/hive_626_bar.txt' overwrite into table hive_bar +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@hive_bar +POSTHOOK: query: load data local inpath '../../data/files/hive_626_bar.txt' overwrite into table hive_bar +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@hive_bar +PREHOOK: query: load data local inpath '../../data/files/hive_626_count.txt' overwrite into table hive_count +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@hive_count +POSTHOOK: query: load data local inpath '../../data/files/hive_626_count.txt' overwrite into table hive_count +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@hive_count +PREHOOK: query: explain +select hive_foo.foo_name, hive_bar.bar_name, n from hive_foo join hive_bar on hive_foo.foo_id = +hive_bar.foo_id join hive_count on hive_count.bar_id = hive_bar.bar_id +PREHOOK: type: QUERY +POSTHOOK: query: explain +select hive_foo.foo_name, hive_bar.bar_name, n from hive_foo join hive_bar on hive_foo.foo_id = +hive_bar.foo_id join hive_count on hive_count.bar_id = hive_bar.bar_id +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: hive_foo + Statistics: Num rows: 0 Data size: 15 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: foo_id is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: foo_id (type: int) + sort order: + + Map-reduce partition columns: foo_id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: foo_name (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: hive_count + Statistics: Num rows: 0 Data size: 5 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: bar_id is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: bar_id (type: int) + sort order: + + Map-reduce partition columns: bar_id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: n (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: hive_bar + Statistics: Num rows: 0 Data size: 23 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (foo_id is not null and bar_id is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: foo_id (type: int) + sort order: + + Map-reduce partition columns: foo_id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: bar_id (type: int), bar_name (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} {VALUE._col3} + outputColumnNames: _col1, _col9, _col13 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col9 (type: int) + sort order: + + Map-reduce partition columns: _col9 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string), _col13 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col1} {VALUE._col12} + 1 {VALUE._col0} + outputColumnNames: _col1, _col13, _col22 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col13 (type: string), _col22 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select hive_foo.foo_name, hive_bar.bar_name, n from hive_foo join hive_bar on hive_foo.foo_id = +hive_bar.foo_id join hive_count on hive_count.bar_id = hive_bar.bar_id +PREHOOK: type: QUERY +PREHOOK: Input: default@hive_bar +PREHOOK: Input: default@hive_count +PREHOOK: Input: default@hive_foo +#### A masked pattern was here #### +POSTHOOK: query: select hive_foo.foo_name, hive_bar.bar_name, n from hive_foo join hive_bar on hive_foo.foo_id = +hive_bar.foo_id join hive_count on hive_count.bar_id = hive_bar.bar_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hive_bar +POSTHOOK: Input: default@hive_count +POSTHOOK: Input: default@hive_foo +#### A masked pattern was here #### +foo1 bar10 2 diff --git ql/src/test/results/clientpositive/spark/join_literals.q.out ql/src/test/results/clientpositive/spark/join_literals.q.out new file mode 100644 index 0000000..eab2085 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_literals.q.out @@ -0,0 +1,41 @@ +WARNING: Comparing a bigint and a string may result in a loss of precision. +PREHOOK: query: -- Test Joins with a variety of literals in the on clause + +SELECT COUNT(*) FROM src a JOIN src b ON a.key = b.key AND a.key = 0L +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- Test Joins with a variety of literals in the on clause + +SELECT COUNT(*) FROM src a JOIN src b ON a.key = b.key AND a.key = 0L +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +9 +PREHOOK: query: SELECT COUNT(*) FROM src a JOIN src b ON a.key = b.key AND a.key = 0S +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) FROM src a JOIN src b ON a.key = b.key AND a.key = 0S +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +9 +PREHOOK: query: SELECT COUNT(*) FROM src a JOIN src b ON a.key = b.key AND a.key = 0Y +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) FROM src a JOIN src b ON a.key = b.key AND a.key = 0Y +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +9 +PREHOOK: query: SELECT COUNT(*) FROM src a JOIN src b ON a.key = b.key AND a.key = 0BD +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) FROM src a JOIN src b ON a.key = b.key AND a.key = 0BD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +9 diff --git ql/src/test/results/clientpositive/spark/join_map_ppr.q.out ql/src/test/results/clientpositive/spark/join_map_ppr.q.out new file mode 100644 index 0000000..4ee6b8d --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_map_ppr.q.out @@ -0,0 +1,1105 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_j1 +PREHOOK: query: EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.key = z.key) +WHERE z.ds='2008-04-08' and z.hr=11 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.key = z.key) +WHERE z.ds='2008-04-08' and z.hr=11 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src1 + x + TOK_TABREF + TOK_TABNAME + src + y + = + . + TOK_TABLE_OR_COL + x + key + . + TOK_TABLE_OR_COL + y + key + TOK_TABREF + TOK_TABNAME + srcpart + z + = + . + TOK_TABLE_OR_COL + x + key + . + TOK_TABLE_OR_COL + z + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest_j1 + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + x + y + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + z + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + y + value + TOK_WHERE + and + = + . + TOK_TABLE_OR_COL + z + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + z + hr + 11 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 2 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [z] + Map 3 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [y] + Map 4 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [x] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + 2 {VALUE._col0} + outputColumnNames: _col0, _col6, _col11 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.key = z.key) +WHERE z.ds='2008-04-08' and z.hr=11 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@dest_j1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.key = z.key) +WHERE z.ds='2008-04-08' and z.hr=11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from dest_j1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +128 val_128 val_128 +128 val_128 val_128 +128 val_128 val_128 +128 val_128 val_128 +128 val_128 val_128 +128 val_128 val_128 +128 val_128 val_128 +128 val_128 val_128 +128 val_128 val_128 +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +150 val_150 val_150 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +224 val_224 val_224 +224 val_224 val_224 +224 val_224 val_224 +224 val_224 val_224 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +66 val_66 val_66 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +PREHOOK: query: CREATE TABLE src_copy(key int, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_copy +POSTHOOK: query: CREATE TABLE src_copy(key int, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_copy +PREHOOK: query: CREATE TABLE src1_copy(key string, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src1_copy +POSTHOOK: query: CREATE TABLE src1_copy(key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src1_copy +PREHOOK: query: INSERT OVERWRITE TABLE src_copy select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_copy +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE src_copy select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_copy +POSTHOOK: Lineage: src_copy.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_copy.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE src1_copy select key, value from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src1_copy +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE src1_copy select key, value from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src1_copy +POSTHOOK: Lineage: src1_copy.key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src1_copy.value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value +FROM src1_copy x JOIN src_copy y ON (x.key = y.key) +JOIN srcpart z ON (x.key = z.key) +WHERE z.ds='2008-04-08' and z.hr=11 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value +FROM src1_copy x JOIN src_copy y ON (x.key = y.key) +JOIN srcpart z ON (x.key = z.key) +WHERE z.ds='2008-04-08' and z.hr=11 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src1_copy + x + TOK_TABREF + TOK_TABNAME + src_copy + y + = + . + TOK_TABLE_OR_COL + x + key + . + TOK_TABLE_OR_COL + y + key + TOK_TABREF + TOK_TABNAME + srcpart + z + = + . + TOK_TABLE_OR_COL + x + key + . + TOK_TABLE_OR_COL + z + key + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest_j1 + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + x + y + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + z + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + y + value + TOK_WHERE + and + = + . + TOK_TABLE_OR_COL + z + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + z + hr + 11 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 2 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [z] + Map 3 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src_copy + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.src_copy + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct src_copy { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.src_copy + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct src_copy { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_copy + name: default.src_copy + Truncated Path -> Alias: + /src_copy [y] + Map 4 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 2 Data size: 216 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src1_copy + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.src1_copy + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct src1_copy { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.src1_copy + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct src1_copy { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1_copy + name: default.src1_copy + Truncated Path -> Alias: + /src1_copy [x] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col1} + 2 {VALUE._col1} + outputColumnNames: _col0, _col6, _col11 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2125 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2125 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value +FROM src1_copy x JOIN src_copy y ON (x.key = y.key) +JOIN srcpart z ON (x.key = z.key) +WHERE z.ds='2008-04-08' and z.hr=11 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1_copy +PREHOOK: Input: default@src_copy +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@dest_j1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value +FROM src1_copy x JOIN src_copy y ON (x.key = y.key) +JOIN srcpart z ON (x.key = z.key) +WHERE z.ds='2008-04-08' and z.hr=11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1_copy +POSTHOOK: Input: default@src_copy +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1_copy)x.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src_copy)y.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from dest_j1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +128 val_128 val_128 +128 val_128 val_128 +128 val_128 val_128 +128 val_128 val_128 +128 val_128 val_128 +128 val_128 val_128 +128 val_128 val_128 +128 val_128 val_128 +128 val_128 val_128 +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +150 val_150 val_150 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +224 val_224 val_224 +224 val_224 val_224 +224 val_224 val_224 +224 val_224 val_224 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +66 val_66 val_66 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 diff --git ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out new file mode 100644 index 0000000..8e924be --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out @@ -0,0 +1,121 @@ +PREHOOK: query: explain +select count(*) from srcpart a join srcpart b on a.key = b.key and a.hr = b.hr join srcpart c on a.hr = c.hr and a.key = c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from srcpart a join srcpart b on a.key = b.key and a.hr = b.hr join srcpart c on a.hr = c.hr and a.key = c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), hr (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), hr (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), hr (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), hr (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), hr (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), hr (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart a join srcpart b on a.key = b.key and a.hr = b.hr join srcpart c on a.hr = c.hr and a.key = c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart a join srcpart b on a.key = b.key and a.hr = b.hr join srcpart c on a.hr = c.hr and a.key = c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +42464 diff --git ql/src/test/results/clientpositive/spark/join_merging.q.out ql/src/test/results/clientpositive/spark/join_merging.q.out new file mode 100644 index 0000000..e443ef0 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_merging.q.out @@ -0,0 +1,203 @@ +PREHOOK: query: CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part +POSTHOOK: query: CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part +PREHOOK: query: explain select p1.p_size, p2.p_size +from part p1 left outer join part p2 on p1.p_partkey = p2.p_partkey + right outer join part p3 on p2.p_partkey = p3.p_partkey and + p1.p_size > 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select p1.p_size, p2.p_size +from part p1 left outer join part p2 on p1.p_partkey = p2.p_partkey + right outer join part p3 on p2.p_partkey = p3.p_partkey and + p1.p_size > 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p_size (type: int) + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (p_size > 10) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {VALUE._col4} + 1 {VALUE._col4} + 2 + outputColumnNames: _col5, _col17 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col5 (type: int), _col17 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select p1.p_size, p2.p_size +from part p1 left outer join part p2 on p1.p_partkey = p2.p_partkey + right outer join part p3 on p2.p_partkey = p3.p_partkey and + p1.p_size > 10 and p1.p_size > p2.p_size + 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select p1.p_size, p2.p_size +from part p1 left outer join part p2 on p1.p_partkey = p2.p_partkey + right outer join part p3 on p2.p_partkey = p3.p_partkey and + p1.p_size > 10 and p1.p_size > p2.p_size + 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p_size (type: int) + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (p_size > 10) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p_size (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {VALUE._col4} + 1 {VALUE._col4} + 2 + outputColumnNames: _col5, _col17 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (_col5 > (_col17 + 10)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col5 (type: int), _col17 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/join_nulls.q.out ql/src/test/results/clientpositive/spark/join_nulls.q.out new file mode 100644 index 0000000..184bee9 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_nulls.q.out @@ -0,0 +1,648 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE myinput1(key int, value int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@myinput1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE myinput1(key int, value int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@myinput1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@myinput1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@myinput1 +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +100 100 48 NULL +100 100 NULL 35 +48 NULL 100 100 +48 NULL 48 NULL +48 NULL NULL 35 +NULL 35 100 100 +NULL 35 48 NULL +NULL 35 NULL 35 +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +100 100 48 NULL +100 100 NULL 35 +48 NULL 100 100 +48 NULL 48 NULL +48 NULL NULL 35 +NULL 35 100 100 +NULL 35 48 NULL +NULL 35 NULL 35 +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +100 100 48 NULL +100 100 NULL 35 +48 NULL 100 100 +48 NULL 48 NULL +48 NULL NULL 35 +NULL 35 100 100 +NULL 35 48 NULL +NULL 35 NULL 35 +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +48 NULL 48 NULL +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +NULL 35 NULL 35 +PREHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +48 NULL NULL NULL +NULL 35 NULL NULL +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +48 NULL NULL NULL +NULL 35 NULL 35 +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +48 NULL 48 NULL +NULL 35 NULL NULL +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +48 NULL NULL NULL +NULL 35 NULL NULL +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +NULL NULL 48 NULL +NULL NULL NULL 35 +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +48 NULL 48 NULL +NULL NULL NULL 35 +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +NULL 35 NULL 35 +NULL NULL 48 NULL +PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +NULL NULL 48 NULL +NULL NULL NULL 35 +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +48 NULL NULL NULL +NULL 35 NULL NULL +NULL NULL 48 NULL +NULL NULL NULL 35 +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +48 NULL 48 NULL +NULL 35 NULL NULL +NULL NULL NULL 35 +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +48 NULL NULL NULL +NULL 35 NULL 35 +NULL NULL 48 NULL +PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +48 NULL NULL NULL +NULL 35 NULL NULL +NULL NULL 48 NULL +NULL NULL NULL 35 +PREHOOK: query: SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 100 100 +NULL 35 NULL 35 NULL 35 +NULL NULL NULL NULL 48 NULL +PREHOOK: query: SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 100 100 +NULL 35 NULL 35 NULL 35 +NULL NULL 48 NULL NULL NULL +PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 100 100 +NULL 35 NULL 35 NULL 35 +NULL NULL NULL NULL 48 NULL +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +100 100 48 NULL +100 100 NULL 35 +48 NULL 100 100 +48 NULL 48 NULL +48 NULL NULL 35 +NULL 35 100 100 +NULL 35 48 NULL +NULL 35 NULL 35 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +48 NULL 48 NULL +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +NULL 35 NULL 35 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +48 NULL 48 NULL +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +NULL 35 NULL 35 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +48 NULL NULL NULL +NULL 35 NULL NULL +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +48 NULL 48 NULL +NULL 35 NULL NULL +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +48 NULL NULL NULL +NULL 35 NULL 35 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +NULL NULL 48 NULL +NULL NULL NULL 35 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +48 NULL 48 NULL +NULL NULL NULL 35 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +100 100 100 100 +NULL 35 NULL 35 +NULL NULL 48 NULL +PREHOOK: query: CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_input1 +POSTHOOK: query: CREATE TABLE smb_input1(key int, value int) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_input1 +PREHOOK: query: CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_input2 +POSTHOOK: query: CREATE TABLE smb_input2(key int, value int) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_input2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_input1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_input1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_input1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_input1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_input2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' into table smb_input2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_input2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_input2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table smb_input2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_input2 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +100 100 100 100 +148 NULL 148 NULL +200 200 200 200 +48 NULL 48 NULL +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key AND a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key AND a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +100 100 100 100 +148 NULL 148 NULL +200 200 200 200 +48 NULL 48 NULL +NULL NULL NULL 135 +NULL NULL NULL 35 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +100 100 100 100 +148 NULL 148 NULL +200 200 200 200 +48 NULL 48 NULL +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +#### A masked pattern was here #### +100 100 100 100 +148 NULL 148 NULL +200 200 200 200 +48 NULL 48 NULL +NULL 135 NULL NULL +NULL 35 NULL NULL +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input2 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input2 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +148 NULL NULL NULL +200 200 200 200 +48 NULL NULL NULL +NULL 135 NULL NULL +NULL 35 NULL NULL +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input2 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input1 +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input2 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input1 +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +NULL NULL 148 NULL +NULL NULL 48 NULL +NULL NULL NULL 135 +NULL NULL NULL 35 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +NULL 135 NULL 135 +NULL 35 NULL 35 +PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a RIGHT OUTER JOIN smb_input2 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a RIGHT OUTER JOIN smb_input2 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +NULL 135 NULL 135 +NULL 35 NULL 35 +NULL NULL 148 NULL +NULL NULL 48 NULL +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +200 200 200 200 +NULL 135 NULL 135 +NULL 35 NULL 35 +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a LEFT OUTER JOIN smb_input2 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a LEFT OUTER JOIN smb_input2 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_input2 +#### A masked pattern was here #### +100 100 100 100 +148 NULL NULL NULL +200 200 200 200 +48 NULL NULL NULL +NULL 135 NULL 135 +NULL 35 NULL 35 diff --git ql/src/test/results/clientpositive/spark/join_rc.q.out ql/src/test/results/clientpositive/spark/join_rc.q.out new file mode 100644 index 0000000..503d785 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_rc.q.out @@ -0,0 +1,1152 @@ +PREHOOK: query: create table join_rc1(key string, value string) stored as RCFile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@join_rc1 +POSTHOOK: query: create table join_rc1(key string, value string) stored as RCFile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@join_rc1 +PREHOOK: query: create table join_rc2(key string, value string) stored as RCFile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@join_rc2 +POSTHOOK: query: create table join_rc2(key string, value string) stored as RCFile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@join_rc2 +PREHOOK: query: insert overwrite table join_rc1 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@join_rc1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table join_rc1 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@join_rc1 +POSTHOOK: Lineage: join_rc1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: join_rc1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table join_rc2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@join_rc2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table join_rc2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@join_rc2 +POSTHOOK: Lineage: join_rc2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: join_rc2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +select join_rc1.key, join_rc2.value +FROM join_rc1 JOIN join_rc2 ON join_rc1.key = join_rc2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select join_rc1.key, join_rc2.value +FROM join_rc1 JOIN join_rc2 ON join_rc1.key = join_rc2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: join_rc2 + Statistics: Num rows: 26 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: join_rc1 + Statistics: Num rows: 52 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 26 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Statistics: Num rows: 28 Data size: 2910 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 2910 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 2910 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select join_rc1.key, join_rc2.value +FROM join_rc1 JOIN join_rc2 ON join_rc1.key = join_rc2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@join_rc1 +PREHOOK: Input: default@join_rc2 +#### A masked pattern was here #### +POSTHOOK: query: select join_rc1.key, join_rc2.value +FROM join_rc1 JOIN join_rc2 ON join_rc1.key = join_rc2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@join_rc1 +POSTHOOK: Input: default@join_rc2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +20 val_20 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +214 val_214 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +218 val_218 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +222 val_222 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +235 val_235 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +241 val_241 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 +252 val_252 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +257 val_257 +258 val_258 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +260 val_260 +262 val_262 +263 val_263 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +274 val_274 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +28 val_28 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +287 val_287 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +289 val_289 +291 val_291 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +302 val_302 +305 val_305 +306 val_306 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +308 val_308 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +310 val_310 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +315 val_315 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +323 val_323 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +33 val_33 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +332 val_332 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +345 val_345 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +351 val_351 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +365 val_365 +366 val_366 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +373 val_373 +374 val_374 +375 val_375 +377 val_377 +378 val_378 +379 val_379 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +4 val_4 +400 val_400 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +402 val_402 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +404 val_404 +404 val_404 +404 val_404 +404 val_404 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +407 val_407 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +41 val_41 +411 val_411 +413 val_413 +413 val_413 +413 val_413 +413 val_413 +414 val_414 +414 val_414 +414 val_414 +414 val_414 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +418 val_418 +419 val_419 +42 val_42 +42 val_42 +42 val_42 +42 val_42 +421 val_421 +424 val_424 +424 val_424 +424 val_424 +424 val_424 +427 val_427 +429 val_429 +429 val_429 +429 val_429 +429 val_429 +43 val_43 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +432 val_432 +435 val_435 +436 val_436 +437 val_437 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +439 val_439 +439 val_439 +439 val_439 +439 val_439 +44 val_44 +443 val_443 +444 val_444 +446 val_446 +448 val_448 +449 val_449 +452 val_452 +453 val_453 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +455 val_455 +457 val_457 +458 val_458 +458 val_458 +458 val_458 +458 val_458 +459 val_459 +459 val_459 +459 val_459 +459 val_459 +460 val_460 +462 val_462 +462 val_462 +462 val_462 +462 val_462 +463 val_463 +463 val_463 +463 val_463 +463 val_463 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +467 val_467 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +47 val_47 +470 val_470 +472 val_472 +475 val_475 +477 val_477 +478 val_478 +478 val_478 +478 val_478 +478 val_478 +479 val_479 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +481 val_481 +482 val_482 +483 val_483 +484 val_484 +485 val_485 +487 val_487 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +490 val_490 +491 val_491 +492 val_492 +492 val_492 +492 val_492 +492 val_492 +493 val_493 +494 val_494 +495 val_495 +496 val_496 +497 val_497 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +51 val_51 +51 val_51 +51 val_51 +51 val_51 +53 val_53 +54 val_54 +57 val_57 +58 val_58 +58 val_58 +58 val_58 +58 val_58 +64 val_64 +65 val_65 +66 val_66 +67 val_67 +67 val_67 +67 val_67 +67 val_67 +69 val_69 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +72 val_72 +72 val_72 +74 val_74 +76 val_76 +76 val_76 +76 val_76 +76 val_76 +77 val_77 +78 val_78 +8 val_8 +80 val_80 +82 val_82 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +9 val_9 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +98 val_98 +98 val_98 diff --git ql/src/test/results/clientpositive/spark/join_reorder.q.out ql/src/test/results/clientpositive/spark/join_reorder.q.out new file mode 100644 index 0000000..db01652 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_reorder.q.out @@ -0,0 +1,694 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t3 +PREHOOK: query: EXPLAIN FROM T1 a JOIN src c ON c.key+1=a.key +SELECT a.key, a.val, c.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FROM T1 a JOIN src c ON c.key+1=a.key +SELECT a.key, a.val, c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (key + 1) (type: double) + sort order: + + Map-reduce partition columns: (key + 1) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN FROM T1 a JOIN src c ON c.key+1=a.key +SELECT /*+ STREAMTABLE(a) */ a.key, a.val, c.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FROM T1 a JOIN src c ON c.key+1=a.key +SELECT /*+ STREAMTABLE(a) */ a.key, a.val, c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (key + 1) (type: double) + sort order: + + Map-reduce partition columns: (key + 1) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM T1 a JOIN src c ON c.key+1=a.key +SELECT a.key, a.val, c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: FROM T1 a JOIN src c ON c.key+1=a.key +SELECT a.key, a.val, c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 11 0 +1 11 0 +1 11 0 +3 13 2 +PREHOOK: query: FROM T1 a JOIN src c ON c.key+1=a.key +SELECT /*+ STREAMTABLE(a) */ a.key, a.val, c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: FROM T1 a JOIN src c ON c.key+1=a.key +SELECT /*+ STREAMTABLE(a) */ a.key, a.val, c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 11 0 +1 11 0 +1 11 0 +3 13 2 +PREHOOK: query: EXPLAIN FROM T1 a + LEFT OUTER JOIN T2 b ON (b.key=a.key) + RIGHT OUTER JOIN T3 c ON (c.val = a.val) +SELECT a.key, b.key, a.val, c.val +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FROM T1 a + LEFT OUTER JOIN T2 b ON (b.key=a.key) + RIGHT OUTER JOIN T3 c ON (c.val = a.val) +SELECT a.key, b.key, a.val, c.val +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col4} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col11 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col1 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN FROM T1 a + LEFT OUTER JOIN T2 b ON (b.key=a.key) + RIGHT OUTER JOIN T3 c ON (c.val = a.val) +SELECT /*+ STREAMTABLE(a) */ a.key, b.key, a.val, c.val +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FROM T1 a + LEFT OUTER JOIN T2 b ON (b.key=a.key) + RIGHT OUTER JOIN T3 c ON (c.val = a.val) +SELECT /*+ STREAMTABLE(a) */ a.key, b.key, a.val, c.val +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col4} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col11 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col1 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM T1 a + LEFT OUTER JOIN T2 b ON (b.key=a.key) + RIGHT OUTER JOIN T3 c ON (c.val = a.val) +SELECT a.key, b.key, a.val, c.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: FROM T1 a + LEFT OUTER JOIN T2 b ON (b.key=a.key) + RIGHT OUTER JOIN T3 c ON (c.val = a.val) +SELECT a.key, b.key, a.val, c.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +2 2 12 12 +NULL NULL NULL 14 +NULL NULL NULL 16 +7 NULL 17 17 +PREHOOK: query: FROM T1 a + LEFT OUTER JOIN T2 b ON (b.key=a.key) + RIGHT OUTER JOIN T3 c ON (c.val = a.val) +SELECT /*+ STREAMTABLE(a) */ a.key, b.key, a.val, c.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: FROM T1 a + LEFT OUTER JOIN T2 b ON (b.key=a.key) + RIGHT OUTER JOIN T3 c ON (c.val = a.val) +SELECT /*+ STREAMTABLE(a) */ a.key, b.key, a.val, c.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +2 2 12 12 +NULL NULL NULL 14 +NULL NULL NULL 16 +7 NULL 17 17 +PREHOOK: query: EXPLAIN FROM UNIQUEJOIN + PRESERVE T1 a (a.key, a.val), + PRESERVE T2 b (b.key, b.val), + PRESERVE T3 c (c.key, c.val) +SELECT a.key, b.key, c.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FROM UNIQUEJOIN + PRESERVE T1 a (a.key, a.val), + PRESERVE T2 b (b.key, b.val), + PRESERVE T3 c (c.key, c.val) +SELECT a.key, b.key, c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Unique Join0 to 0 + Unique Join0 to 0 + Unique Join0 to 0 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + 2 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col5, _col10 + Statistics: Num rows: 0 Data size: 66 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 66 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 66 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN FROM UNIQUEJOIN + PRESERVE T1 a (a.key, a.val), + PRESERVE T2 b (b.key, b.val), + PRESERVE T3 c (c.key, c.val) +SELECT /*+ STREAMTABLE(b) */ a.key, b.key, c.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FROM UNIQUEJOIN + PRESERVE T1 a (a.key, a.val), + PRESERVE T2 b (b.key, b.val), + PRESERVE T3 c (c.key, c.val) +SELECT /*+ STREAMTABLE(b) */ a.key, b.key, c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Unique Join0 to 0 + Unique Join0 to 0 + Unique Join0 to 0 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + 2 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col5, _col10 + Statistics: Num rows: 0 Data size: 66 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 66 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 66 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM UNIQUEJOIN + PRESERVE T1 a (a.key, a.val), + PRESERVE T2 b (b.key, b.val), + PRESERVE T3 c (c.key, c.val) +SELECT a.key, b.key, c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: FROM UNIQUEJOIN + PRESERVE T1 a (a.key, a.val), + PRESERVE T2 b (b.key, b.val), + PRESERVE T3 c (c.key, c.val) +SELECT a.key, b.key, c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +1 NULL NULL +2 NULL 2 +NULL 2 NULL +3 3 NULL +NULL 4 4 +NULL 5 NULL +NULL NULL 6 +7 NULL 7 +8 8 NULL +8 8 NULL +8 NULL NULL +PREHOOK: query: FROM UNIQUEJOIN + PRESERVE T1 a (a.key, a.val), + PRESERVE T2 b (b.key, b.val), + PRESERVE T3 c (c.key, c.val) +SELECT /*+ STREAMTABLE(b) */ a.key, b.key, c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: FROM UNIQUEJOIN + PRESERVE T1 a (a.key, a.val), + PRESERVE T2 b (b.key, b.val), + PRESERVE T3 c (c.key, c.val) +SELECT /*+ STREAMTABLE(b) */ a.key, b.key, c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +1 NULL NULL +2 NULL 2 +NULL 2 NULL +3 3 NULL +NULL 4 4 +NULL 5 NULL +NULL NULL 6 +7 NULL 7 +8 8 NULL +8 8 NULL +8 NULL NULL diff --git ql/src/test/results/clientpositive/spark/join_reorder2.q.out ql/src/test/results/clientpositive/spark/join_reorder2.q.out new file mode 100644 index 0000000..1698575 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_reorder2.q.out @@ -0,0 +1,358 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: CREATE TABLE T4(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T4 +POSTHOOK: query: CREATE TABLE T4(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T4 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T4 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t4 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T4 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t4 +PREHOOK: query: EXPLAIN +SELECT /*+ STREAMTABLE(a) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON b.key = c.key + JOIN T4 d ON c.key = d.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT /*+ STREAMTABLE(a) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON b.key = c.key + JOIN T4 d ON c.key = d.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 2 to 3 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + 3 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ STREAMTABLE(a) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON b.key = c.key + JOIN T4 d ON c.key = d.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +PREHOOK: Input: default@t4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ STREAMTABLE(a) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON b.key = c.key + JOIN T4 d ON c.key = d.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +POSTHOOK: Input: default@t4 +#### A masked pattern was here #### +2 12 2 22 2 12 2 12 +PREHOOK: query: EXPLAIN +SELECT /*+ STREAMTABLE(a) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON a.val = c.val + JOIN T4 d ON a.key + 1 = d.key + 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT /*+ STREAMTABLE(a) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON a.val = c.val + JOIN T4 d ON a.key + 1 = d.key + 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: (key + 1) (type: double) + sort order: + + Map-reduce partition columns: (key + 1) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and (key + 1) is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: val is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((key is not null and val is not null) and (key + 1) is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {VALUE._col6} {VALUE._col10} {VALUE._col11} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col4} {VALUE._col5} + 1 {VALUE._col0} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: (_col0 + 1) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + 1) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ STREAMTABLE(a) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON a.val = c.val + JOIN T4 d ON a.key + 1 = d.key + 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +PREHOOK: Input: default@t4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ STREAMTABLE(a) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON a.val = c.val + JOIN T4 d ON a.key + 1 = d.key + 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +POSTHOOK: Input: default@t4 +#### A masked pattern was here #### +2 12 2 22 2 12 2 12 diff --git ql/src/test/results/clientpositive/spark/join_reorder3.q.out ql/src/test/results/clientpositive/spark/join_reorder3.q.out new file mode 100644 index 0000000..ebcbd0d --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_reorder3.q.out @@ -0,0 +1,358 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: CREATE TABLE T4(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T4 +POSTHOOK: query: CREATE TABLE T4(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T4 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T4 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t4 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T4 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t4 +PREHOOK: query: EXPLAIN +SELECT /*+ STREAMTABLE(a,c) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON b.key = c.key + JOIN T4 d ON c.key = d.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT /*+ STREAMTABLE(a,c) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON b.key = c.key + JOIN T4 d ON c.key = d.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 2 to 3 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + 3 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ STREAMTABLE(a,c) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON b.key = c.key + JOIN T4 d ON c.key = d.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +PREHOOK: Input: default@t4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ STREAMTABLE(a,c) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON b.key = c.key + JOIN T4 d ON c.key = d.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +POSTHOOK: Input: default@t4 +#### A masked pattern was here #### +2 12 2 22 2 12 2 12 +PREHOOK: query: EXPLAIN +SELECT /*+ STREAMTABLE(a,c) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON a.val = c.val + JOIN T4 d ON a.key + 1 = d.key + 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT /*+ STREAMTABLE(a,c) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON a.val = c.val + JOIN T4 d ON a.key + 1 = d.key + 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: (key + 1) (type: double) + sort order: + + Map-reduce partition columns: (key + 1) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and (key + 1) is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: val is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((key is not null and val is not null) and (key + 1) is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {VALUE._col6} {VALUE._col10} {VALUE._col11} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col4} {VALUE._col5} + 1 {VALUE._col0} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: (_col0 + 1) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + 1) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ STREAMTABLE(a,c) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON a.val = c.val + JOIN T4 d ON a.key + 1 = d.key + 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +PREHOOK: Input: default@t4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ STREAMTABLE(a,c) */ * +FROM T1 a JOIN T2 b ON a.key = b.key + JOIN T3 c ON a.val = c.val + JOIN T4 d ON a.key + 1 = d.key + 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +POSTHOOK: Input: default@t4 +#### A masked pattern was here #### +2 12 2 22 2 12 2 12 diff --git ql/src/test/results/clientpositive/spark/join_reorder4.q.out ql/src/test/results/clientpositive/spark/join_reorder4.q.out new file mode 100644 index 0000000..4f9cf2f --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_reorder4.q.out @@ -0,0 +1,348 @@ +PREHOOK: query: CREATE TABLE T1(key1 STRING, val1 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key1 STRING, val1 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: CREATE TABLE T2(key2 STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key2 STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: CREATE TABLE T3(key3 STRING, val3 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: CREATE TABLE T3(key3 STRING, val3 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t3 +PREHOOK: query: explain select /*+ STREAMTABLE(a) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select /*+ STREAMTABLE(a) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key2 is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key2 (type: string) + sort order: + + Map-reduce partition columns: key2 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val2 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key3 is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key3 (type: string) + sort order: + + Map-reduce partition columns: key3 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val3 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key1 (type: string) + sort order: + + Map-reduce partition columns: key1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ STREAMTABLE(a) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ STREAMTABLE(a) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +2 12 2 22 2 12 +PREHOOK: query: explain select /*+ STREAMTABLE(b) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select /*+ STREAMTABLE(b) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key2 is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key2 (type: string) + sort order: + + Map-reduce partition columns: key2 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val2 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key3 is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key3 (type: string) + sort order: + + Map-reduce partition columns: key3 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val3 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key1 (type: string) + sort order: + + Map-reduce partition columns: key1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ STREAMTABLE(b) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ STREAMTABLE(b) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +2 12 2 22 2 12 +PREHOOK: query: explain select /*+ STREAMTABLE(c) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select /*+ STREAMTABLE(c) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key2 is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key2 (type: string) + sort order: + + Map-reduce partition columns: key2 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val2 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key3 is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key3 (type: string) + sort order: + + Map-reduce partition columns: key3 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val3 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key1 (type: string) + sort order: + + Map-reduce partition columns: key1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ STREAMTABLE(c) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ STREAMTABLE(c) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +2 12 2 22 2 12 diff --git ql/src/test/results/clientpositive/spark/join_star.q.out ql/src/test/results/clientpositive/spark/join_star.q.out new file mode 100644 index 0000000..d666adf --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_star.q.out @@ -0,0 +1,864 @@ +PREHOOK: query: create table fact(m1 int, m2 int, d1 int, d2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fact +POSTHOOK: query: create table fact(m1 int, m2 int, d1 int, d2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fact +PREHOOK: query: create table dim1(f1 int, f2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dim1 +POSTHOOK: query: create table dim1(f1 int, f2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dim1 +PREHOOK: query: create table dim2(f3 int, f4 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dim2 +POSTHOOK: query: create table dim2(f3 int, f4 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dim2 +PREHOOK: query: create table dim3(f5 int, f6 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dim3 +POSTHOOK: query: create table dim3(f5 int, f6 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dim3 +PREHOOK: query: create table dim4(f7 int, f8 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dim4 +POSTHOOK: query: create table dim4(f7 int, f8 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dim4 +PREHOOK: query: create table dim5(f9 int, f10 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dim5 +POSTHOOK: query: create table dim5(f9 int, f10 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dim5 +PREHOOK: query: create table dim6(f11 int, f12 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dim6 +POSTHOOK: query: create table dim6(f11 int, f12 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dim6 +PREHOOK: query: create table dim7(f13 int, f14 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dim7 +POSTHOOK: query: create table dim7(f13 int, f14 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dim7 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fact-data.txt' INTO TABLE fact +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fact +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fact-data.txt' INTO TABLE fact +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fact +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dim1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dim1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dim2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dim2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dim3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dim3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim4 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dim4 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim4 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dim4 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim5 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dim5 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim5 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dim5 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim6 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dim6 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim6 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dim6 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim7 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dim7 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dim-data.txt' INTO TABLE dim7 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dim7 +PREHOOK: query: explain select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dim1 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: f1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: f1 (type: int) + sort order: + + Map-reduce partition columns: f1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: f2 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: fact + Statistics: Num rows: 8 Data size: 98 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: d1 (type: int) + sort order: + + Map-reduce partition columns: d1 (type: int) + Statistics: Num rows: 4 Data size: 49 Basic stats: COMPLETE Column stats: NONE + value expressions: m1 (type: int), m2 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col8 + Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 +11 12 1 +21 22 1 +31 32 1 +41 42 1 +51 52 3 +61 62 3 +71 72 3 +81 82 3 +91 92 3 +PREHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dim2 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: f3 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: f3 (type: int) + sort order: + + Map-reduce partition columns: f3 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: f4 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: dim1 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: f1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: f1 (type: int) + sort order: + + Map-reduce partition columns: f1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: f2 (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: fact + Statistics: Num rows: 6 Data size: 98 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d1 is not null and d2 is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: d1 (type: int) + sort order: + + Map-reduce partition columns: d1 (type: int) + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: m1 (type: int), m2 (type: int), d2 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col7} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col8, _col13 + Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3, _col8 + Statistics: Num rows: 2 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 2 Data size: 35 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@dim2 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@dim2 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 2 +11 12 1 2 +21 22 1 2 +31 32 1 2 +41 42 1 2 +51 52 3 4 +61 62 3 4 +71 72 3 4 +81 82 3 4 +91 92 3 4 +PREHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1= dim1.f1 join dim2 on dim1.f2 = dim2.f3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1= dim1.f1 join dim2 on dim1.f2 = dim2.f3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dim2 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: f3 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: f3 (type: int) + sort order: + + Map-reduce partition columns: f3 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: f4 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: dim1 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (f1 is not null and f2 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: f1 (type: int) + sort order: + + Map-reduce partition columns: f1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: f2 (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: fact + Statistics: Num rows: 8 Data size: 98 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: d1 (type: int) + sort order: + + Map-reduce partition columns: d1 (type: int) + Statistics: Num rows: 4 Data size: 49 Basic stats: COMPLETE Column stats: NONE + value expressions: m1 (type: int), m2 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col8, _col13 + Statistics: Num rows: 4 Data size: 58 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 58 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 58 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col8 + Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col8 (type: int) + sort order: + + Map-reduce partition columns: _col8 (type: int) + Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select m1, m2, f2, f4 from fact join dim1 on fact.d1= dim1.f1 join dim2 on dim1.f2 = dim2.f3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@dim2 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: select m1, m2, f2, f4 from fact join dim1 on fact.d1= dim1.f1 join dim2 on dim1.f2 = dim2.f3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@dim2 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 1 +11 12 1 1 +21 22 1 1 +31 32 1 1 +41 42 1 1 +51 52 3 3 +61 62 3 3 +71 72 3 3 +81 82 3 3 +91 92 3 3 +PREHOOK: query: explain select m1, m2, f2, f4 from fact Left outer join dim1 on fact.d1= dim1.f1 Left outer join dim2 on dim1.f2 = dim2.f3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select m1, m2, f2, f4 from fact Left outer join dim1 on fact.d1= dim1.f1 Left outer join dim2 on dim1.f2 = dim2.f3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dim2 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: f3 (type: int) + sort order: + + Map-reduce partition columns: f3 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: f4 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: dim1 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: f1 (type: int) + sort order: + + Map-reduce partition columns: f1 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: f2 (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: fact + Statistics: Num rows: 8 Data size: 98 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: d1 (type: int) + sort order: + + Map-reduce partition columns: d1 (type: int) + Statistics: Num rows: 8 Data size: 98 Basic stats: COMPLETE Column stats: NONE + value expressions: m1 (type: int), m2 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col8, _col13 + Statistics: Num rows: 8 Data size: 117 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 117 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 117 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col8 + Statistics: Num rows: 8 Data size: 107 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col8 (type: int) + sort order: + + Map-reduce partition columns: _col8 (type: int) + Statistics: Num rows: 8 Data size: 107 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select m1, m2, f2, f4 from fact Left outer join dim1 on fact.d1= dim1.f1 Left outer join dim2 on dim1.f2 = dim2.f3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@dim2 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: select m1, m2, f2, f4 from fact Left outer join dim1 on fact.d1= dim1.f1 Left outer join dim2 on dim1.f2 = dim2.f3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@dim2 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 1 +11 12 1 1 +21 22 1 1 +31 32 1 1 +41 42 1 1 +51 52 3 3 +61 62 3 3 +71 72 3 3 +81 82 3 3 +91 92 3 3 +PREHOOK: query: explain Select m1, m2, f2, f4, f6, f8, f10, f12, f14 + from fact + Left outer join dim1 on fact.d1= dim1.f1 + Left outer join dim2 on dim1.f2 = dim2.f3 + Left outer Join dim3 on fact.d2= dim3.f5 + Left outer Join dim4 on dim3.f6= dim4.f7 + Left outer join dim5 on dim4.f8= dim5.f9 + Left outer Join dim6 on dim3.f6= dim6.f11 + Left outer Join dim7 on dim6.f12 = dim7.f13 +PREHOOK: type: QUERY +POSTHOOK: query: explain Select m1, m2, f2, f4, f6, f8, f10, f12, f14 + from fact + Left outer join dim1 on fact.d1= dim1.f1 + Left outer join dim2 on dim1.f2 = dim2.f3 + Left outer Join dim3 on fact.d2= dim3.f5 + Left outer Join dim4 on dim3.f6= dim4.f7 + Left outer join dim5 on dim4.f8= dim5.f9 + Left outer Join dim6 on dim3.f6= dim6.f11 + Left outer Join dim7 on dim6.f12 = dim7.f13 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 7 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 12 (GROUP PARTITION-LEVEL SORT, 1), Map 14 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 13 (GROUP PARTITION-LEVEL SORT, 1), Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 11 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 7 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 9 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 9 <- Map 10 (GROUP PARTITION-LEVEL SORT, 1), Map 8 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dim3 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: f5 (type: int) + sort order: + + Map-reduce partition columns: f5 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: f6 (type: int) + Map 10 + Map Operator Tree: + TableScan + alias: fact + Statistics: Num rows: 6 Data size: 98 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: d1 (type: int) + sort order: + + Map-reduce partition columns: d1 (type: int) + Statistics: Num rows: 6 Data size: 98 Basic stats: COMPLETE Column stats: NONE + value expressions: m1 (type: int), m2 (type: int), d2 (type: int) + Map 11 + Map Operator Tree: + TableScan + alias: dim7 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: f13 (type: int) + sort order: + + Map-reduce partition columns: f13 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: f14 (type: int) + Map 12 + Map Operator Tree: + TableScan + alias: dim6 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: f11 (type: int) + sort order: + + Map-reduce partition columns: f11 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: f12 (type: int) + Map 13 + Map Operator Tree: + TableScan + alias: dim5 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: f9 (type: int) + sort order: + + Map-reduce partition columns: f9 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: f10 (type: int) + Map 14 + Map Operator Tree: + TableScan + alias: dim4 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: f7 (type: int) + sort order: + + Map-reduce partition columns: f7 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: f8 (type: int) + Map 6 + Map Operator Tree: + TableScan + alias: dim2 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: f3 (type: int) + sort order: + + Map-reduce partition columns: f3 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: f4 (type: int) + Map 8 + Map Operator Tree: + TableScan + alias: dim1 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: f1 (type: int) + sort order: + + Map-reduce partition columns: f1 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: f2 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col7} {VALUE._col12} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col8, _col13, _col18 + Statistics: Num rows: 6 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col18 (type: int) + sort order: + + Map-reduce partition columns: _col18 (type: int) + Statistics: Num rows: 6 Data size: 128 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col8} {VALUE._col13} {KEY.reducesinkkey0} + 1 {VALUE._col0} + 2 {VALUE._col0} + outputColumnNames: _col0, _col1, _col8, _col13, _col18, _col23, _col28 + Statistics: Num rows: 13 Data size: 281 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col23 (type: int) + sort order: + + Map-reduce partition columns: _col23 (type: int) + Statistics: Num rows: 13 Data size: 281 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int), _col18 (type: int), _col28 (type: int) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col8} {VALUE._col13} {VALUE._col18} {KEY.reducesinkkey0} {VALUE._col27} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col8, _col13, _col18, _col23, _col28, _col33 + Statistics: Num rows: 14 Data size: 309 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col28 (type: int) + sort order: + + Map-reduce partition columns: _col28 (type: int) + Statistics: Num rows: 14 Data size: 309 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int), _col18 (type: int), _col23 (type: int), _col33 (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col8} {VALUE._col13} {VALUE._col18} {VALUE._col23} {KEY.reducesinkkey0} {VALUE._col32} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col8, _col13, _col18, _col23, _col28, _col33, _col38 + Statistics: Num rows: 15 Data size: 339 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int), _col18 (type: int), _col23 (type: int), _col33 (type: int), _col28 (type: int), _col38 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 15 Data size: 339 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 339 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col3} {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3, _col8, _col13 + Statistics: Num rows: 6 Data size: 117 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 6 Data size: 117 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3, _col8 + Statistics: Num rows: 6 Data size: 107 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col8 (type: int) + sort order: + + Map-reduce partition columns: _col8 (type: int) + Statistics: Num rows: 6 Data size: 107 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: Select m1, m2, f2, f4, f6, f8, f10, f12, f14 + from fact + Left outer join dim1 on fact.d1= dim1.f1 + Left outer join dim2 on dim1.f2 = dim2.f3 + Left outer Join dim3 on fact.d2= dim3.f5 + Left outer Join dim4 on dim3.f6= dim4.f7 + Left outer join dim5 on dim4.f8= dim5.f9 + Left outer Join dim6 on dim3.f6= dim6.f11 + Left outer Join dim7 on dim6.f12 = dim7.f13 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@dim2 +PREHOOK: Input: default@dim3 +PREHOOK: Input: default@dim4 +PREHOOK: Input: default@dim5 +PREHOOK: Input: default@dim6 +PREHOOK: Input: default@dim7 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: Select m1, m2, f2, f4, f6, f8, f10, f12, f14 + from fact + Left outer join dim1 on fact.d1= dim1.f1 + Left outer join dim2 on dim1.f2 = dim2.f3 + Left outer Join dim3 on fact.d2= dim3.f5 + Left outer Join dim4 on dim3.f6= dim4.f7 + Left outer join dim5 on dim4.f8= dim5.f9 + Left outer Join dim6 on dim3.f6= dim6.f11 + Left outer Join dim7 on dim6.f12 = dim7.f13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@dim2 +POSTHOOK: Input: default@dim3 +POSTHOOK: Input: default@dim4 +POSTHOOK: Input: default@dim5 +POSTHOOK: Input: default@dim6 +POSTHOOK: Input: default@dim7 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 1 2 2 2 2 2 +11 12 1 1 2 2 2 2 2 +21 22 1 1 2 2 2 2 2 +31 32 1 1 2 2 2 2 2 +41 42 1 1 2 2 2 2 2 +51 52 3 3 4 4 4 4 4 +61 62 3 3 4 4 4 4 4 +71 72 3 3 4 4 4 4 4 +81 82 3 3 4 4 4 4 4 +91 92 3 3 4 4 4 4 4 diff --git ql/src/test/results/clientpositive/spark/join_thrift.q.out ql/src/test/results/clientpositive/spark/join_thrift.q.out new file mode 100644 index 0000000..38ffdd8 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_thrift.q.out @@ -0,0 +1,119 @@ +PREHOOK: query: DESCRIBE src_thrift +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@src_thrift +POSTHOOK: query: DESCRIBE src_thrift +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@src_thrift +aint int from deserializer +astring string from deserializer +lint array from deserializer +lstring array from deserializer +lintstring array> from deserializer +mstringstring map from deserializer +attributes map,map>>>> from deserializer +unionfield1 uniontype,map> from deserializer +unionfield2 uniontype,map> from deserializer +unionfield3 uniontype,map> from deserializer +PREHOOK: query: EXPLAIN +SELECT s1.aint, s2.lintstring +FROM src_thrift s1 +JOIN src_thrift s2 +ON s1.aint = s2.aint +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT s1.aint, s2.lintstring +FROM src_thrift s1 +JOIN src_thrift s2 +ON s1.aint = s2.aint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: s2 + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: aint is not null (type: boolean) + Statistics: Num rows: 6 Data size: 1674 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: aint (type: int) + sort order: + + Map-reduce partition columns: aint (type: int) + Statistics: Num rows: 6 Data size: 1674 Basic stats: COMPLETE Column stats: NONE + value expressions: lintstring (type: array>) + Map 3 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: aint is not null (type: boolean) + Statistics: Num rows: 6 Data size: 1674 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: aint (type: int) + sort order: + + Map-reduce partition columns: aint (type: int) + Statistics: Num rows: 6 Data size: 1674 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col3} + outputColumnNames: _col0, _col17 + Statistics: Num rows: 6 Data size: 1841 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col17 (type: array>) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1841 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1841 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT s1.aint, s2.lintstring +FROM src_thrift s1 +JOIN src_thrift s2 +ON s1.aint = s2.aint +PREHOOK: type: QUERY +PREHOOK: Input: default@src_thrift +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.aint, s2.lintstring +FROM src_thrift s1 +JOIN src_thrift s2 +ON s1.aint = s2.aint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_thrift +#### A masked pattern was here #### +-1952710710 [{"myint":25,"mystring":"125","underscore_int":5}] +-1461153973 [{"myint":49,"mystring":"343","underscore_int":7}] +-751827638 [{"myint":4,"mystring":"8","underscore_int":2}] +-734328909 [{"myint":16,"mystring":"64","underscore_int":4}] +0 NULL +336964413 [{"myint":81,"mystring":"729","underscore_int":9}] +465985200 [{"myint":1,"mystring":"1","underscore_int":1}] +477111222 [{"myint":9,"mystring":"27","underscore_int":3}] +1244525190 [{"myint":36,"mystring":"216","underscore_int":6}] +1638581578 [{"myint":64,"mystring":"512","underscore_int":8}] +1712634731 [{"myint":0,"mystring":"0","underscore_int":0}] diff --git ql/src/test/results/clientpositive/spark/join_vc.q.out ql/src/test/results/clientpositive/spark/join_vc.q.out new file mode 100644 index 0000000..6e34ef3 --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_vc.q.out @@ -0,0 +1,129 @@ +PREHOOK: query: -- see HIVE-4033 earlier a flag named hasVC was not initialized correctly in MapOperator.java, resulting in NPE for following query. order by and limit in the query is not relevant, problem would be evident even without those. They are there to keep .q.out file small and sorted. + +explain select t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value from src t1 join src t2 on t1.key = t2.key join src t3 on t2.value = t3.value order by t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value limit 3 +PREHOOK: type: QUERY +POSTHOOK: query: -- see HIVE-4033 earlier a flag named hasVC was not initialized correctly in MapOperator.java, resulting in NPE for following query. order by and limit in the query is not relevant, problem would be evident even without those. They are there to keep .q.out file small and sorted. + +explain select t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value from src t1 join src t2 on t1.key = t2.key join src t3 on t2.value = t3.value order by t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value limit 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} + outputColumnNames: _col10, _col11, _col12 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: bigint), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {VALUE._col0} + outputColumnNames: _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: 3 + Processor Tree: + ListSink + +PREHOOK: query: select t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value from src t1 join src t2 on t1.key = t2.key join src t3 on t2.value = t3.value order by t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value from src t1 join src t2 on t1.key = t2.key join src t3 on t2.value = t3.value order by t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 238 val_238 +0 238 val_238 +0 238 val_238 diff --git ql/src/test/results/clientpositive/spark/join_view.q.out ql/src/test/results/clientpositive/spark/join_view.q.out new file mode 100644 index 0000000..ef7a31d --- /dev/null +++ ql/src/test/results/clientpositive/spark/join_view.q.out @@ -0,0 +1,109 @@ +PREHOOK: query: drop table invites +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table invites +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table invites2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table invites2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table invites (foo int, bar string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@invites +POSTHOOK: query: create table invites (foo int, bar string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@invites +PREHOOK: query: create table invites2 (foo int, bar string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@invites2 +POSTHOOK: query: create table invites2 (foo int, bar string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@invites2 +PREHOOK: query: -- test join views: see HIVE-1989 + +create view v as select invites.bar, invites2.foo, invites2.ds from invites join invites2 on invites.ds=invites2.ds +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@invites +PREHOOK: Input: default@invites2 +PREHOOK: Output: database:default +PREHOOK: Output: default@v +POSTHOOK: query: -- test join views: see HIVE-1989 + +create view v as select invites.bar, invites2.foo, invites2.ds from invites join invites2 on invites.ds=invites2.ds +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@invites +POSTHOOK: Input: default@invites2 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@v +PREHOOK: query: explain select * from v where ds='2011-09-01' +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from v where ds='2011-09-01' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map 3 + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col1} + 1 {VALUE._col0} + outputColumnNames: _col1, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col6 (type: int), '2011-09-01' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop view v +PREHOOK: type: DROPVIEW +PREHOOK: Input: default@v +PREHOOK: Output: default@v +POSTHOOK: query: drop view v +POSTHOOK: type: DROPVIEW +POSTHOOK: Input: default@v +POSTHOOK: Output: default@v +PREHOOK: query: drop table invites +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@invites +PREHOOK: Output: default@invites +POSTHOOK: query: drop table invites +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@invites +POSTHOOK: Output: default@invites +PREHOOK: query: drop table invites2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@invites2 +PREHOOK: Output: default@invites2 +POSTHOOK: query: drop table invites2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@invites2 +POSTHOOK: Output: default@invites2 diff --git ql/src/test/results/clientpositive/spark/leftsemijoin.q.out ql/src/test/results/clientpositive/spark/leftsemijoin.q.out new file mode 100644 index 0000000..11f0bb0 --- /dev/null +++ ql/src/test/results/clientpositive/spark/leftsemijoin.q.out @@ -0,0 +1,114 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +drop table sales +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- SORT_QUERY_RESULTS + +drop table sales +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table things +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table things +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE sales (name STRING, id INT) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sales +POSTHOOK: query: CREATE TABLE sales (name STRING, id INT) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sales +PREHOOK: query: CREATE TABLE things (id INT, name STRING) partitioned by (ds string) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@things +POSTHOOK: query: CREATE TABLE things (id INT, name STRING) partitioned by (ds string) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@things +PREHOOK: query: load data local inpath '../../data/files/sales.txt' INTO TABLE sales +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@sales +POSTHOOK: query: load data local inpath '../../data/files/sales.txt' INTO TABLE sales +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@sales +PREHOOK: query: load data local inpath '../../data/files/things.txt' INTO TABLE things partition(ds='2011-10-23') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@things +POSTHOOK: query: load data local inpath '../../data/files/things.txt' INTO TABLE things partition(ds='2011-10-23') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@things +POSTHOOK: Output: default@things@ds=2011-10-23 +PREHOOK: query: load data local inpath '../../data/files/things2.txt' INTO TABLE things partition(ds='2011-10-24') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@things +POSTHOOK: query: load data local inpath '../../data/files/things2.txt' INTO TABLE things partition(ds='2011-10-24') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@things +POSTHOOK: Output: default@things@ds=2011-10-24 +PREHOOK: query: SELECT name,id FROM sales +PREHOOK: type: QUERY +PREHOOK: Input: default@sales +#### A masked pattern was here #### +POSTHOOK: query: SELECT name,id FROM sales +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sales +#### A masked pattern was here #### +Hank 2 +Joe 2 +PREHOOK: query: SELECT id,name FROM things +PREHOOK: type: QUERY +PREHOOK: Input: default@things +PREHOOK: Input: default@things@ds=2011-10-23 +PREHOOK: Input: default@things@ds=2011-10-24 +#### A masked pattern was here #### +POSTHOOK: query: SELECT id,name FROM things +POSTHOOK: type: QUERY +POSTHOOK: Input: default@things +POSTHOOK: Input: default@things@ds=2011-10-23 +POSTHOOK: Input: default@things@ds=2011-10-24 +#### A masked pattern was here #### +2 Tie +2 Tie +PREHOOK: query: SELECT name,id FROM sales LEFT SEMI JOIN things ON (sales.id = things.id) +PREHOOK: type: QUERY +PREHOOK: Input: default@sales +PREHOOK: Input: default@things +PREHOOK: Input: default@things@ds=2011-10-23 +PREHOOK: Input: default@things@ds=2011-10-24 +#### A masked pattern was here #### +POSTHOOK: query: SELECT name,id FROM sales LEFT SEMI JOIN things ON (sales.id = things.id) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sales +POSTHOOK: Input: default@things +POSTHOOK: Input: default@things@ds=2011-10-23 +POSTHOOK: Input: default@things@ds=2011-10-24 +#### A masked pattern was here #### +Hank 2 +Joe 2 +PREHOOK: query: drop table sales +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@sales +PREHOOK: Output: default@sales +POSTHOOK: query: drop table sales +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@sales +POSTHOOK: Output: default@sales +PREHOOK: query: drop table things +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@things +PREHOOK: Output: default@things +POSTHOOK: query: drop table things +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@things +POSTHOOK: Output: default@things diff --git ql/src/test/results/clientpositive/spark/leftsemijoin_mr.q.out ql/src/test/results/clientpositive/spark/leftsemijoin_mr.q.out new file mode 100644 index 0000000..fe63057 --- /dev/null +++ ql/src/test/results/clientpositive/spark/leftsemijoin_mr.q.out @@ -0,0 +1,98 @@ +PREHOOK: query: CREATE TABLE T1(key INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/leftsemijoin_mr_t1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/leftsemijoin_mr_t1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/leftsemijoin_mr_t2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/leftsemijoin_mr_t2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: -- Run this query using TestMinimrCliDriver + +SELECT * FROM T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: -- Run this query using TestMinimrCliDriver + +SELECT * FROM T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 +1 +PREHOOK: query: SELECT * FROM T2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM T2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +PREHOOK: query: SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp ON (T1.key=tmp.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp ON (T1.key=tmp.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +1 +1 +PREHOOK: query: SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp ON (T1.key=tmp.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp ON (T1.key=tmp.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +1 +1 diff --git ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out new file mode 100644 index 0000000..d4cd429 --- /dev/null +++ ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out @@ -0,0 +1,1725 @@ +PREHOOK: query: EXPLAIN EXTENDED + FROM + src a + LEFT OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED + FROM + src a + LEFT OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_LEFTOUTERJOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_TABREF + TOK_TABNAME + srcpart + b + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + b + ds + '2008-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col5 > 15) and (_col5 < 25)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + LEFT OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + LEFT OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 +PREHOOK: query: EXPLAIN EXTENDED + FROM + srcpart a + LEFT OUTER JOIN + src b + ON (a.key = b.key AND a.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED + FROM + srcpart a + LEFT OUTER JOIN + src b + ON (a.key = b.key AND a.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_LEFTOUTERJOIN + TOK_TABREF + TOK_TABNAME + srcpart + a + TOK_TABREF + TOK_TABNAME + src + b + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + ds + '2008-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string), ds (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [a] + /srcpart/ds=2008-04-08/hr=12 [a] + /srcpart/ds=2008-04-09/hr=11 [a] + /srcpart/ds=2008-04-09/hr=12 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + filter mappings: + 0 [1, 1] + filter predicates: + 0 {(VALUE._col1 = '2008-04-08')} + 1 + outputColumnNames: _col0, _col1, _col7, _col8 + Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col7 > 15) and (_col7 < 25)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + srcpart a + LEFT OUTER JOIN + src b + ON (a.key = b.key AND a.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + srcpart a + LEFT OUTER JOIN + src b + ON (a.key = b.key AND a.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 +PREHOOK: query: EXPLAIN EXTENDED + FROM + src a + LEFT OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED + FROM + src a + LEFT OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_LEFTOUTERJOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_TABREF + TOK_TABNAME + srcpart + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + = + . + TOK_TABLE_OR_COL + b + ds + '2008-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string), ds (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + /srcpart/ds=2008-04-09/hr=11 [b] + /srcpart/ds=2008-04-09/hr=12 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col5, _col6, _col7 + Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (((_col5 > 15) and (_col5 < 25)) and (_col7 = '2008-04-08')) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + LEFT OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + LEFT OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 +PREHOOK: query: EXPLAIN EXTENDED + FROM + srcpart a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND a.ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED + FROM + srcpart a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND a.ds = '2008-04-08' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_LEFTOUTERJOIN + TOK_TABREF + TOK_TABNAME + srcpart + a + TOK_TABREF + TOK_TABNAME + src + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + = + . + TOK_TABLE_OR_COL + a + ds + '2008-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [a] + /srcpart/ds=2008-04-08/hr=12 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col7, _col8 + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col7 > 15) and (_col7 < 25)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + srcpart a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND a.ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + srcpart a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND a.ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 diff --git ql/src/test/results/clientpositive/spark/mapjoin_addjar.q.out ql/src/test/results/clientpositive/spark/mapjoin_addjar.q.out new file mode 100644 index 0000000..c107818 --- /dev/null +++ ql/src/test/results/clientpositive/spark/mapjoin_addjar.q.out @@ -0,0 +1,37 @@ +PREHOOK: query: CREATE TABLE t1 (a string, b string) +ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: CREATE TABLE t1 (a string, b string) +ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/sample.json" INTO TABLE t1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/sample.json" INTO TABLE t1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: select * from src join t1 on src.key =t1.a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select * from src join t1 on src.key =t1.a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +2 val_2 2 blah +PREHOOK: query: drop table t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: drop table t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 diff --git ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out new file mode 100644 index 0000000..5e0a9e8 --- /dev/null +++ ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out @@ -0,0 +1,177 @@ +PREHOOK: query: CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k +POSTHOOK: query: CREATE TABLE over1k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over1k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over1k +PREHOOK: query: CREATE TABLE t1(dec decimal(4,2)) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: CREATE TABLE t1(dec decimal(4,2)) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: INSERT INTO TABLE t1 select dec from over1k +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k +PREHOOK: Output: default@t1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT INTO TABLE t1 select dec from over1k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k +POSTHOOK: Output: default@t1 +POSTHOOK: Lineage: t1.dec SIMPLE [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +PREHOOK: query: CREATE TABLE t2(dec decimal(4,0)) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: CREATE TABLE t2(dec decimal(4,0)) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: INSERT INTO TABLE t2 select dec from over1k +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k +PREHOOK: Output: default@t2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT INTO TABLE t2 select dec from over1k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.dec EXPRESSION [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +PREHOOK: query: explain +select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 12 Data size: 1359 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: dec is not null (type: boolean) + Statistics: Num rows: 6 Data size: 679 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: dec (type: decimal(4,0)) + sort order: + + Map-reduce partition columns: dec (type: decimal(4,0)) + Statistics: Num rows: 6 Data size: 679 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 21 Data size: 2422 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: dec is not null (type: boolean) + Statistics: Num rows: 11 Data size: 1268 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: dec (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: dec (type: decimal(4,2)) + Statistics: Num rows: 11 Data size: 1268 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col4 + Statistics: Num rows: 12 Data size: 1394 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: decimal(4,2)), _col4 (type: decimal(4,0)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1394 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1394 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +PREHOOK: query: select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +PREHOOK: query: select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out new file mode 100644 index 0000000..9f66974 --- /dev/null +++ ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out @@ -0,0 +1,528 @@ +PREHOOK: query: explain +FROM srcpart c +JOIN srcpart d +ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') +SELECT /*+ MAPJOIN(d) */ DISTINCT c.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM srcpart c +JOIN srcpart d +ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') +SELECT /*+ MAPJOIN(d) */ DISTINCT c.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: partials + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 4 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM srcpart c +JOIN srcpart d +ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') +SELECT /*+ MAPJOIN(d) */ DISTINCT c.value as value order by value limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM srcpart c +JOIN srcpart d +ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') +SELECT /*+ MAPJOIN(d) */ DISTINCT c.value as value order by value limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +val_0 +val_10 +val_100 +val_103 +val_104 +val_105 +val_11 +val_111 +val_113 +val_114 +PREHOOK: query: explain +FROM srcpart c +JOIN srcpart d +ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') +SELECT /*+ MAPJOIN(d) */ DISTINCT c.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM srcpart c +JOIN srcpart d +ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') +SELECT /*+ MAPJOIN(d) */ DISTINCT c.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM srcpart c +JOIN srcpart d +ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') +SELECT /*+ MAPJOIN(d) */ DISTINCT c.value as value order by value limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM srcpart c +JOIN srcpart d +ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') +SELECT /*+ MAPJOIN(d) */ DISTINCT c.value as value order by value limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +val_0 +val_10 +val_100 +val_103 +val_104 +val_105 +val_11 +val_111 +val_113 +val_114 +PREHOOK: query: explain +FROM srcpart c +JOIN srcpart d +ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') +SELECT /*+ MAPJOIN(d) */ DISTINCT c.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM srcpart c +JOIN srcpart d +ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') +SELECT /*+ MAPJOIN(d) */ DISTINCT c.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 4 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM srcpart c +JOIN srcpart d +ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') +SELECT /*+ MAPJOIN(d) */ DISTINCT c.value as value order by value limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM srcpart c +JOIN srcpart d +ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') +SELECT /*+ MAPJOIN(d) */ DISTINCT c.value as value order by value limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +val_0 +val_10 +val_100 +val_103 +val_104 +val_105 +val_11 +val_111 +val_113 +val_114 +PREHOOK: query: explain +FROM srcpart c +JOIN srcpart d +ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') +SELECT /*+ MAPJOIN(d) */ DISTINCT c.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM srcpart c +JOIN srcpart d +ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') +SELECT /*+ MAPJOIN(d) */ DISTINCT c.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM srcpart c +JOIN srcpart d +ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') +SELECT /*+ MAPJOIN(d) */ DISTINCT c.value as value order by value limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM srcpart c +JOIN srcpart d +ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') +SELECT /*+ MAPJOIN(d) */ DISTINCT c.value as value order by value limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +val_0 +val_10 +val_100 +val_103 +val_104 +val_105 +val_11 +val_111 +val_113 +val_114 diff --git ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out new file mode 100644 index 0000000..e89ad85 --- /dev/null +++ ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out @@ -0,0 +1,340 @@ +PREHOOK: query: --HIVE-2101 mapjoin sometimes gives wrong results if there is a filter in the on condition + +SELECT * FROM src1 + RIGHT OUTER JOIN src1 src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 300) + SORT BY src1.key, src2.key, src3.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: --HIVE-2101 mapjoin sometimes gives wrong results if there is a filter in the on condition + +SELECT * FROM src1 + RIGHT OUTER JOIN src1 src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 300) + SORT BY src1.key, src2.key, src3.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +NULL NULL 128 128 val_128 +NULL NULL 128 128 val_128 +NULL NULL 128 128 val_128 +NULL NULL 146 val_146 146 val_146 +NULL NULL 146 val_146 146 val_146 +NULL NULL 150 val_150 150 val_150 +NULL NULL 213 val_213 213 val_213 +NULL NULL 213 val_213 213 val_213 +NULL NULL 224 224 val_224 +NULL NULL 224 224 val_224 +NULL NULL 238 val_238 238 val_238 +NULL NULL 238 val_238 238 val_238 +NULL NULL 255 val_255 255 val_255 +NULL NULL 255 val_255 255 val_255 +NULL NULL 273 val_273 273 val_273 +NULL NULL 273 val_273 273 val_273 +NULL NULL 273 val_273 273 val_273 +NULL NULL 278 val_278 278 val_278 +NULL NULL 278 val_278 278 val_278 +NULL NULL 66 val_66 66 val_66 +NULL NULL 98 val_98 98 val_98 +NULL NULL 98 val_98 98 val_98 +PREHOOK: query: explain +SELECT /*+ mapjoin(src1, src2) */ * FROM src1 + RIGHT OUTER JOIN src1 src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 300) + SORT BY src1.key, src2.key, src3.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT /*+ mapjoin(src1, src2) */ * FROM src1 + RIGHT OUTER JOIN src1 src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 300) + SORT BY src1.key, src2.key, src3.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 300) (type: boolean) + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 300) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 300) and (key < 10)) (type: boolean) + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 > 10)} + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + sort order: +++ + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string), _col5 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ mapjoin(src1, src2) */ * FROM src1 + RIGHT OUTER JOIN src1 src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 300) + SORT BY src1.key, src2.key, src3.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ mapjoin(src1, src2) */ * FROM src1 + RIGHT OUTER JOIN src1 src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 300) + SORT BY src1.key, src2.key, src3.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +NULL NULL 128 128 val_128 +NULL NULL 128 128 val_128 +NULL NULL 128 128 val_128 +NULL NULL 146 val_146 146 val_146 +NULL NULL 146 val_146 146 val_146 +NULL NULL 150 val_150 150 val_150 +NULL NULL 213 val_213 213 val_213 +NULL NULL 213 val_213 213 val_213 +NULL NULL 224 224 val_224 +NULL NULL 224 224 val_224 +NULL NULL 238 val_238 238 val_238 +NULL NULL 238 val_238 238 val_238 +NULL NULL 255 val_255 255 val_255 +NULL NULL 255 val_255 255 val_255 +NULL NULL 273 val_273 273 val_273 +NULL NULL 273 val_273 273 val_273 +NULL NULL 273 val_273 273 val_273 +NULL NULL 278 val_278 278 val_278 +NULL NULL 278 val_278 278 val_278 +NULL NULL 66 val_66 66 val_66 +NULL NULL 98 val_98 98 val_98 +NULL NULL 98 val_98 98 val_98 +PREHOOK: query: explain +SELECT * FROM src1 + RIGHT OUTER JOIN src1 src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 300) + SORT BY src1.key, src2.key, src3.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM src1 + RIGHT OUTER JOIN src1 src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 300) + SORT BY src1.key, src2.key, src3.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 300) (type: boolean) + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 300) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 300) and (key < 10)) (type: boolean) + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 > 10)} + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + sort order: +++ + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string), _col5 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM src1 + RIGHT OUTER JOIN src1 src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 300) + SORT BY src1.key, src2.key, src3.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src1 + RIGHT OUTER JOIN src1 src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 300) + SORT BY src1.key, src2.key, src3.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +NULL NULL 128 128 val_128 +NULL NULL 128 128 val_128 +NULL NULL 128 128 val_128 +NULL NULL 146 val_146 146 val_146 +NULL NULL 146 val_146 146 val_146 +NULL NULL 150 val_150 150 val_150 +NULL NULL 213 val_213 213 val_213 +NULL NULL 213 val_213 213 val_213 +NULL NULL 224 224 val_224 +NULL NULL 224 224 val_224 +NULL NULL 238 val_238 238 val_238 +NULL NULL 238 val_238 238 val_238 +NULL NULL 255 val_255 255 val_255 +NULL NULL 255 val_255 255 val_255 +NULL NULL 273 val_273 273 val_273 +NULL NULL 273 val_273 273 val_273 +NULL NULL 273 val_273 273 val_273 +NULL NULL 278 val_278 278 val_278 +NULL NULL 278 val_278 278 val_278 +NULL NULL 66 val_66 66 val_66 +NULL NULL 98 val_98 98 val_98 +NULL NULL 98 val_98 98 val_98 diff --git ql/src/test/results/clientpositive/spark/mapjoin_hook.q.out ql/src/test/results/clientpositive/spark/mapjoin_hook.q.out new file mode 100644 index 0000000..8b1c223 --- /dev/null +++ ql/src/test/results/clientpositive/spark/mapjoin_hook.q.out @@ -0,0 +1,59 @@ +PREHOOK: query: drop table dest1 +PREHOOK: type: DROPTABLE +RUN: Stage-0:DDL +PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +RUN: Stage-0:DDL +PREHOOK: query: INSERT OVERWRITE TABLE dest1 +SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 +RUN: Stage-1:MAPRED +RUN: Stage-2:DEPENDENCY_COLLECTION +RUN: Stage-0:MOVE +RUN: Stage-3:STATS +PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 +RUN: Stage-1:MAPRED +RUN: Stage-2:DEPENDENCY_COLLECTION +RUN: Stage-0:MOVE +RUN: Stage-3:STATS +PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@dest1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 +RUN: Stage-1:MAPRED +RUN: Stage-2:DEPENDENCY_COLLECTION +RUN: Stage-0:MOVE +RUN: Stage-3:STATS +PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 +RUN: Stage-1:MAPRED +RUN: Stage-2:DEPENDENCY_COLLECTION +RUN: Stage-0:MOVE +RUN: Stage-3:STATS diff --git ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out new file mode 100644 index 0000000..1801d13 --- /dev/null +++ ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out @@ -0,0 +1,816 @@ +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +explain extended select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) +PREHOOK: type: QUERY +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +explain extended select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcpart + TOK_TABREF + TOK_TABNAME + src + = + . + TOK_TABLE_OR_COL + srcpart + value + . + TOK_TABLE_OR_COL + src + value + TOK_TABREF + TOK_TABNAME + src1 + = + . + TOK_TABLE_OR_COL + srcpart + key + . + TOK_TABLE_OR_COL + src1 + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + srcpart + key + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value is not null and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + /srcpart/ds=2008-04-09/hr=11 [srcpart] + /srcpart/ds=2008-04-09/hr=12 [srcpart] + Map 4 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [src1] + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Reducer 3 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450' +PREHOOK: type: QUERY +POSTHOOK: query: explain +select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((value is not null and key is not null) and (value > 'val_450')) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value is not null and (value > 'val_450')) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select count(*) from srcpart join src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from srcpart join src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value is not null and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), ds (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + outputColumnNames: _col0, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col1} + 1 + outputColumnNames: _col2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string) + outputColumnNames: _col2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +66 +66 +66 +66 +98 +98 +98 +98 +98 +98 +98 +98 +98 +98 +98 +98 +98 +98 +98 +98 +PREHOOK: query: select count(*) from srcpart join src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart join src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +5308 +5308 +PREHOOK: query: select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +66 +66 +66 +66 +98 +98 +98 +98 +98 +98 +98 +98 +98 +98 +98 +98 +98 +98 +98 +98 +PREHOOK: query: select count(*) from srcpart join src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart join src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +5308 +5308 diff --git ql/src/test/results/clientpositive/spark/mapjoin_memcheck.q.out ql/src/test/results/clientpositive/spark/mapjoin_memcheck.q.out new file mode 100644 index 0000000..b04ccec --- /dev/null +++ ql/src/test/results/clientpositive/spark/mapjoin_memcheck.q.out @@ -0,0 +1,138 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table src0 like src +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src0 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table src0 like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src0 +PREHOOK: query: insert into table src0 select * from src where src.key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src0 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert into table src0 select * from src where src.key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src0 +POSTHOOK: Lineage: src0.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src0.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +select src1.key as k1, src1.value as v1, src2.key, src2.value +from src0 src1 inner join src0 src2 on src1.key = src2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select src1.key as k1, src1.value as v1, src2.key, src2.value +from src0 src1 inner join src0 src2 on src1.key = src2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key as k1, src1.value as v1, src2.key, src2.value +from src0 src1 inner join src0 src2 on src1.key = src2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src0 +#### A masked pattern was here #### +POSTHOOK: query: select src1.key as k1, src1.value as v1, src2.key, src2.value +from src0 src1 inner join src0 src2 on src1.key = src2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src0 +#### A masked pattern was here #### +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +2 val_2 2 val_2 +4 val_4 4 val_4 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +8 val_8 8 val_8 +9 val_9 9 val_9 +PREHOOK: query: drop table src0 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src0 +PREHOOK: Output: default@src0 +POSTHOOK: query: drop table src0 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src0 +POSTHOOK: Output: default@src0 diff --git ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out new file mode 100644 index 0000000..3631911 --- /dev/null +++ ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out @@ -0,0 +1,490 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN +SELECT subq.key1, z.value +FROM +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN +SELECT subq.key1, z.value +FROM +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col5 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT subq.key1, z.value +FROM +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: SELECT subq.key1, z.value +FROM +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +150 val_150 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +66 val_66 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +PREHOOK: query: EXPLAIN +SELECT subq.key1, z.value +FROM +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT subq.key1, z.value +FROM +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col5 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT subq.key1, z.value +FROM +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: SELECT subq.key1, z.value +FROM +(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 + FROM src1 x JOIN src y ON (x.key = y.key)) subq + JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +150 val_150 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +66 val_66 +98 val_98 +98 val_98 +98 val_98 +98 val_98 diff --git ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out new file mode 100644 index 0000000..4138a8a --- /dev/null +++ ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out @@ -0,0 +1,232 @@ +PREHOOK: query: drop table x +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table x +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table y +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table y +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table z +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table z +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE x (name STRING, id INT) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@x +POSTHOOK: query: CREATE TABLE x (name STRING, id INT) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x +PREHOOK: query: CREATE TABLE y (id INT, name STRING) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@y +POSTHOOK: query: CREATE TABLE y (id INT, name STRING) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@y +PREHOOK: query: CREATE TABLE z (id INT, name STRING) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@z +POSTHOOK: query: CREATE TABLE z (id INT, name STRING) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@z +PREHOOK: query: load data local inpath '../../data/files/x.txt' INTO TABLE x +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@x +POSTHOOK: query: load data local inpath '../../data/files/x.txt' INTO TABLE x +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@x +PREHOOK: query: load data local inpath '../../data/files/y.txt' INTO TABLE y +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@y +POSTHOOK: query: load data local inpath '../../data/files/y.txt' INTO TABLE y +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@y +PREHOOK: query: load data local inpath '../../data/files/z.txt' INTO TABLE z +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@z +POSTHOOK: query: load data local inpath '../../data/files/z.txt' INTO TABLE z +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@z +PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN +SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name +FROM +(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2 + FROM y JOIN x ON (x.id = y.id)) subq + JOIN z ON (subq.key1 = z.id) +PREHOOK: type: QUERY +POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin + +EXPLAIN +SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name +FROM +(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2 + FROM y JOIN x ON (x.id = y.id)) subq + JOIN z ON (subq.key1 = z.id) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 0 Data size: 13 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: name (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 0 Data size: 6 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: name (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 0 Data size: 6 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: name (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col6 (type: int), _col5 (type: string), _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name +FROM +(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2 + FROM y JOIN x ON (x.id = y.id)) subq + JOIN z ON (subq.key1 = z.id) +PREHOOK: type: QUERY +PREHOOK: Input: default@x +PREHOOK: Input: default@y +PREHOOK: Input: default@z +#### A masked pattern was here #### +POSTHOOK: query: SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name +FROM +(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2 + FROM y JOIN x ON (x.id = y.id)) subq + JOIN z ON (subq.key1 = z.id) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@x +POSTHOOK: Input: default@y +POSTHOOK: Input: default@z +#### A masked pattern was here #### +2 Joe 2 Tie 2 Tie +2 Hank 2 Tie 2 Tie +PREHOOK: query: drop table x +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@x +PREHOOK: Output: default@x +POSTHOOK: query: drop table x +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@x +POSTHOOK: Output: default@x +PREHOOK: query: drop table y +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@y +PREHOOK: Output: default@y +POSTHOOK: query: drop table y +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@y +POSTHOOK: Output: default@y +PREHOOK: query: drop table z +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@z +PREHOOK: Output: default@z +POSTHOOK: query: drop table z +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@z +POSTHOOK: Output: default@z diff --git ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out new file mode 100644 index 0000000..19d03a0 --- /dev/null +++ ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out @@ -0,0 +1,1342 @@ +PREHOOK: query: --HIVE-2101 mapjoin sometimes gives wrong results if there is a filter in the on condition + +create table dest_1 (key STRING, value STRING) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_1 +POSTHOOK: query: --HIVE-2101 mapjoin sometimes gives wrong results if there is a filter in the on condition + +create table dest_1 (key STRING, value STRING) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_1 +PREHOOK: query: insert overwrite table dest_1 select * from src1 order by src1.value limit 8 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table dest_1 select * from src1 order by src1.value limit 8 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest_1 +POSTHOOK: Lineage: dest_1.key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_1.value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert into table dest_1 select "333444","555666" from src1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert into table dest_1 select "333444","555666" from src1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest_1 +POSTHOOK: Lineage: dest_1.key SIMPLE [] +POSTHOOK: Lineage: dest_1.value SIMPLE [] +PREHOOK: query: create table dest_2 (key STRING, value STRING) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_2 +POSTHOOK: query: create table dest_2 (key STRING, value STRING) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_2 +PREHOOK: query: insert into table dest_2 select * from dest_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_1 +PREHOOK: Output: default@dest_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert into table dest_2 select * from dest_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_1 +POSTHOOK: Output: default@dest_2 +POSTHOOK: Lineage: dest_2.key SIMPLE [(dest_1)dest_1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: dest_2.value SIMPLE [(dest_1)dest_1.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM src1 + RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) + JOIN dest_2 src3 ON (src2.key = src3.key) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_1 +PREHOOK: Input: default@dest_2 +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src1 + RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) + JOIN dest_2 src3 ON (src2.key = src3.key) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_1 +POSTHOOK: Input: default@dest_2 +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +NULL NULL 333444 555666 333444 555666 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 +128 128 128 +146 val_146 146 val_146 146 val_146 +224 224 224 +369 369 369 +PREHOOK: query: explain +SELECT /*+ mapjoin(src1, src2) */ * FROM src1 + RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) + JOIN dest_2 src3 ON (src2.key = src3.key) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT /*+ mapjoin(src1, src2) */ * FROM src1 + RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) + JOIN dest_2 src3 ON (src2.key = src3.key) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 0 Data size: 49 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 49 Basic stats: PARTIAL Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 0 Data size: 49 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 49 Basic stats: PARTIAL Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ mapjoin(src1, src2) */ * FROM src1 + RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) + JOIN dest_2 src3 ON (src2.key = src3.key) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_1 +PREHOOK: Input: default@dest_2 +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ mapjoin(src1, src2) */ * FROM src1 + RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) + JOIN dest_2 src3 ON (src2.key = src3.key) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_1 +POSTHOOK: Input: default@dest_2 +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +NULL NULL 333444 555666 333444 555666 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 +128 128 128 +146 val_146 146 val_146 146 val_146 +224 224 224 +369 369 369 +PREHOOK: query: SELECT /*+ mapjoin(src1, src2) */ * FROM src1 + RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) + JOIN dest_2 src3 ON (src1.key = src3.key) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_1 +PREHOOK: Input: default@dest_2 +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ mapjoin(src1, src2) */ * FROM src1 + RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) + JOIN dest_2 src3 ON (src1.key = src3.key) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_1 +POSTHOOK: Input: default@dest_2 +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 +128 128 128 +146 val_146 146 val_146 146 val_146 +224 224 224 +369 369 369 +PREHOOK: query: SELECT * FROM src1 + LEFT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) + JOIN dest_2 src3 ON (src1.key = src3.key) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_1 +PREHOOK: Input: default@dest_2 +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src1 + LEFT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) + JOIN dest_2 src3 ON (src1.key = src3.key) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_1 +POSTHOOK: Input: default@dest_2 +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 +128 128 128 +146 val_146 146 val_146 146 val_146 +224 224 224 +369 369 369 +PREHOOK: query: SELECT * FROM src1 + LEFT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) + JOIN dest_2 src3 ON (src2.key = src3.key) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_1 +PREHOOK: Input: default@dest_2 +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src1 + LEFT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) + JOIN dest_2 src3 ON (src2.key = src3.key) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_1 +POSTHOOK: Input: default@dest_2 +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 +128 128 128 +146 val_146 146 val_146 146 val_146 +224 224 224 +369 369 369 +PREHOOK: query: explain +SELECT * FROM src1 + RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) + JOIN dest_2 src3 ON (src2.key = src3.key) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT * FROM src1 + RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) + JOIN dest_2 src3 ON (src2.key = src3.key) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 0 Data size: 49 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 49 Basic stats: PARTIAL Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 0 Data size: 49 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 49 Basic stats: PARTIAL Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM src1 + RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) + JOIN dest_2 src3 ON (src2.key = src3.key) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_1 +PREHOOK: Input: default@dest_2 +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src1 + RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) + JOIN dest_2 src3 ON (src2.key = src3.key) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_1 +POSTHOOK: Input: default@dest_2 +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +NULL NULL 333444 555666 333444 555666 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_165 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_193 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_265 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_27 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_409 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 + val_484 +128 128 128 +146 val_146 146 val_146 146 val_146 +224 224 224 +369 369 369 diff --git ql/src/test/results/clientpositive/spark/mergejoins.q.out ql/src/test/results/clientpositive/spark/mergejoins.q.out new file mode 100644 index 0000000..de88e0f --- /dev/null +++ ql/src/test/results/clientpositive/spark/mergejoins.q.out @@ -0,0 +1,258 @@ +PREHOOK: query: create table a (val1 int, val2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: create table a (val1 int, val2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: create table b (val1 int, val2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@b +POSTHOOK: query: create table b (val1 int, val2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@b +PREHOOK: query: create table c (val1 int, val2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@c +POSTHOOK: query: create table c (val1 int, val2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@c +PREHOOK: query: create table d (val1 int, val2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@d +POSTHOOK: query: create table d (val1 int, val2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@d +PREHOOK: query: create table e (val1 int, val2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e +POSTHOOK: query: create table e (val1 int, val2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e +PREHOOK: query: explain select * from a join b on a.val1=b.val1 join c on a.val1=c.val1 join d on a.val1=d.val1 join e on a.val2=e.val2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from a join b on a.val1=b.val1 join c on a.val1=c.val1 join d on a.val1=d.val1 join e on a.val2=e.val2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: val1 is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: val1 (type: int) + sort order: + + Map-reduce partition columns: val1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val2 (type: int) + Map 4 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: val2 is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: val2 (type: int) + sort order: + + Map-reduce partition columns: val2 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val1 (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: val1 is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: val1 (type: int) + sort order: + + Map-reduce partition columns: val1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val2 (type: int) + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: val1 is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: val1 (type: int) + sort order: + + Map-reduce partition columns: val1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val2 (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (val1 is not null and val2 is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: val1 (type: int) + sort order: + + Map-reduce partition columns: val1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val2 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + 3 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int), _col15 (type: int), _col16 (type: int) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} {VALUE._col14} {VALUE._col15} + 1 {VALUE._col0} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16, _col20, _col21 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int), _col15 (type: int), _col16 (type: int), _col20 (type: int), _col21 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: --HIVE-3070 filter on outer join condition removed while merging join tree +explain select * from src a join src b on a.key=b.key left outer join src c on b.key=c.key and b.key<10 +PREHOOK: type: QUERY +POSTHOOK: query: --HIVE-3070 filter on outer join condition removed while merging join tree +explain select * from src a join src b on a.key=b.key left outer join src c on b.key=c.key and b.key<10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 < 10)} + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out new file mode 100644 index 0000000..1dda951 --- /dev/null +++ ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out @@ -0,0 +1,1346 @@ +PREHOOK: query: -- HIVE-3464 + +create table a (key string, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@a +POSTHOOK: query: -- HIVE-3464 + +create table a (key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@a +PREHOOK: query: -- (a-b-c-d) +explain +select * from a join a b on (a.key=b.key) left outer join a c on (b.key=c.key) left outer join a d on (a.key=d.key) +PREHOOK: type: QUERY +POSTHOOK: query: -- (a-b-c-d) +explain +select * from a join a b on (a.key=b.key) left outer join a c on (b.key=c.key) left outer join a d on (a.key=d.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join1 to 2 + Left Outer Join0 to 3 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + 3 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select * from a join a b on (a.key=b.key) left outer join a c on (b.key=c.key) right outer join a d on (a.key=d.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from a join a b on (a.key=b.key) left outer join a c on (b.key=c.key) right outer join a d on (a.key=d.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join1 to 2 + Right Outer Join0 to 3 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + 3 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select * from a join a b on (a.key=b.key) right outer join a c on (b.key=c.key) left outer join a d on (a.key=d.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from a join a b on (a.key=b.key) right outer join a c on (b.key=c.key) left outer join a d on (a.key=d.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join1 to 2 + Left Outer Join0 to 3 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + 3 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select * from a join a b on (a.key=b.key) right outer join a c on (b.key=c.key) right outer join a d on (a.key=d.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from a join a b on (a.key=b.key) right outer join a c on (b.key=c.key) right outer join a d on (a.key=d.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join1 to 2 + Right Outer Join0 to 3 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + 3 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- ((a-b-d)-c) (reordered) +explain +select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) left outer join a d on (a.key=d.key) +PREHOOK: type: QUERY +POSTHOOK: query: -- ((a-b-d)-c) (reordered) +explain +select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) left outer join a d on (a.key=d.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col10 (type: string), _col11 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} {VALUE._col9} {VALUE._col10} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col15 (type: string), _col16 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select * from a join a b on (a.key=b.key) right outer join a c on (b.value=c.key) right outer join a d on (a.key=d.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from a join a b on (a.key=b.key) right outer join a c on (b.value=c.key) right outer join a d on (a.key=d.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col10 (type: string), _col11 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} {VALUE._col9} {VALUE._col10} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col15 (type: string), _col16 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select * from a join a b on (a.key=b.key) full outer join a c on (b.value=c.key) full outer join a d on (a.key=d.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from a join a b on (a.key=b.key) full outer join a c on (b.value=c.key) full outer join a d on (a.key=d.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Outer Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col10 (type: string), _col11 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} {VALUE._col9} {VALUE._col10} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col15 (type: string), _col16 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- (((a-b)-c)-d) +explain +select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) right outer join a d on (a.key=d.key) +PREHOOK: type: QUERY +POSTHOOK: query: -- (((a-b)-c)-d) +explain +select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) right outer join a d on (a.key=d.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) full outer join a d on (a.key=d.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) full outer join a d on (a.key=d.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select * from a join a b on (a.key=b.key) right outer join a c on (b.value=c.key) left outer join a d on (a.key=d.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from a join a b on (a.key=b.key) right outer join a c on (b.value=c.key) left outer join a d on (a.key=d.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select * from a join a b on (a.key=b.key) right outer join a c on (b.value=c.key) full outer join a d on (a.key=d.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from a join a b on (a.key=b.key) right outer join a c on (b.value=c.key) full outer join a d on (a.key=d.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- ((a-b)-c-d) +explain +select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) left outer join a d on (c.key=d.key) +PREHOOK: type: QUERY +POSTHOOK: query: -- ((a-b)-c-d) +explain +select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) left outer join a d on (c.key=d.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/multi_join_union.q.out ql/src/test/results/clientpositive/spark/multi_join_union.q.out new file mode 100644 index 0000000..72e6fb5 --- /dev/null +++ ql/src/test/results/clientpositive/spark/multi_join_union.q.out @@ -0,0 +1,175 @@ +PREHOOK: query: CREATE TABLE src11 as SELECT * FROM src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@src11 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: CREATE TABLE src11 as SELECT * FROM src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src11 +PREHOOK: query: CREATE TABLE src12 as SELECT * FROM src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@src12 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: CREATE TABLE src12 as SELECT * FROM src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src12 +PREHOOK: query: CREATE TABLE src13 as SELECT * FROM src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@src13 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: CREATE TABLE src13 as SELECT * FROM src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src13 +PREHOOK: query: CREATE TABLE src14 as SELECT * FROM src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@src14 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: CREATE TABLE src14 as SELECT * FROM src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src14 +PREHOOK: query: EXPLAIN SELECT * FROM +src11 a JOIN +src12 b ON (a.key = b.key) JOIN +(SELECT * FROM (SELECT * FROM src13 UNION ALL SELECT * FROM src14)a )c ON c.value = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT * FROM +src11 a JOIN +src12 b ON (a.key = b.key) JOIN +(SELECT * FROM (SELECT * FROM src13 UNION ALL SELECT * FROM src14)a )c ON c.value = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Union 5 (GROUP PARTITION-LEVEL SORT, 1) + Union 5 <- Map 4 (NONE, 0), Map 7 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 8 Data size: 1603 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 8 Data size: 1603 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src14 + Filter Operator + predicate: value is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 15 Data size: 3006 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 15 Data size: 3006 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: src13 + Filter Operator + predicate: value is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 16 Data size: 3306 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 16 Data size: 3306 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} + 1 {VALUE._col0} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 33 Data size: 6613 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 33 Data size: 6613 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 33 Data size: 6613 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 5 + Vertex: Union 5 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out new file mode 100644 index 0000000..6e44a91 --- /dev/null +++ ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out @@ -0,0 +1,947 @@ +PREHOOK: query: EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_FULLOUTERJOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_TABREF + TOK_TABNAME + srcpart + b + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + b + ds + '2008-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string), ds (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + /srcpart/ds=2008-04-09/hr=11 [b] + /srcpart/ds=2008-04-09/hr=12 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + filter mappings: + 1 [0, 1] + filter predicates: + 0 + 1 {(VALUE._col1 = '2008-04-08')} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((((_col5 > 15) and (_col5 < 25)) and (_col0 > 10)) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 +PREHOOK: query: EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED + FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_FULLOUTERJOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_TABREF + TOK_TABNAME + srcpart + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + = + . + TOK_TABLE_OR_COL + b + ds + '2008-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string), ds (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + /srcpart/ds=2008-04-09/hr=11 [b] + /srcpart/ds=2008-04-09/hr=12 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col5, _col6, _col7 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (((((_col5 > 15) and (_col5 < 25)) and (_col7 = '2008-04-08')) and (_col0 > 10)) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + FULL OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 diff --git ql/src/test/results/clientpositive/spark/parquet_join.q.out ql/src/test/results/clientpositive/spark/parquet_join.q.out new file mode 100644 index 0000000..d5a8684 --- /dev/null +++ ql/src/test/results/clientpositive/spark/parquet_join.q.out @@ -0,0 +1,367 @@ +PREHOOK: query: drop table if exists staging +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists staging +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists parquet_jointable1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists parquet_jointable1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists parquet_jointable2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists parquet_jointable2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists parquet_jointable1_bucketed_sorted +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists parquet_jointable1_bucketed_sorted +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists parquet_jointable2_bucketed_sorted +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists parquet_jointable2_bucketed_sorted +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table staging (key int, value string) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@staging +POSTHOOK: query: create table staging (key int, value string) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@staging +PREHOOK: query: insert into table staging select distinct key, value from src order by key limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@staging +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert into table staging select distinct key, value from src order by key limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@staging +POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table parquet_jointable1 stored as parquet as select * from staging +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@staging +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_jointable1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: create table parquet_jointable1 stored as parquet as select * from staging +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@staging +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_jointable1 +PREHOOK: query: create table parquet_jointable2 stored as parquet as select key,key+1,concat(value,"value") as myvalue from staging +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@staging +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_jointable2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: create table parquet_jointable2 stored as parquet as select key,key+1,concat(value,"value") as myvalue from staging +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@staging +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_jointable2 +PREHOOK: query: -- MR join + +explain select p2.myvalue from parquet_jointable1 p1 join parquet_jointable2 p2 on p1.key=p2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- MR join + +explain select p2.myvalue from parquet_jointable1 p1 join parquet_jointable2 p2 on p1.key=p2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 4 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 217 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 217 Basic stats: COMPLETE Column stats: NONE + value expressions: myvalue (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 74 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 37 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 37 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {VALUE._col1} + outputColumnNames: _col7 + Statistics: Num rows: 40 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 40 Data size: 162 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 162 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p2.myvalue from parquet_jointable1 p1 join parquet_jointable2 p2 on p1.key=p2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_jointable1 +PREHOOK: Input: default@parquet_jointable2 +#### A masked pattern was here #### +POSTHOOK: query: select p2.myvalue from parquet_jointable1 p1 join parquet_jointable2 p2 on p1.key=p2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_jointable1 +POSTHOOK: Input: default@parquet_jointable2 +#### A masked pattern was here #### +val_0value +val_10value +PREHOOK: query: -- The two tables involved in the join have differing number of columns(table1-2,table2-3). In case of Map and SMB join, +-- when the second table is loaded, the column indices in hive.io.file.readcolumn.ids refer to columns of both the first and the second table +-- and hence the parquet schema/types passed to ParquetInputSplit should contain only the column indexes belonging to second/current table + +-- Map join + +explain select p2.myvalue from parquet_jointable1 p1 join parquet_jointable2 p2 on p1.key=p2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The two tables involved in the join have differing number of columns(table1-2,table2-3). In case of Map and SMB join, +-- when the second table is loaded, the column indices in hive.io.file.readcolumn.ids refer to columns of both the first and the second table +-- and hence the parquet schema/types passed to ParquetInputSplit should contain only the column indexes belonging to second/current table + +-- Map join + +explain select p2.myvalue from parquet_jointable1 p1 join parquet_jointable2 p2 on p1.key=p2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 4 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 217 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 217 Basic stats: COMPLETE Column stats: NONE + value expressions: myvalue (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 74 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 37 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 37 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {VALUE._col1} + outputColumnNames: _col7 + Statistics: Num rows: 40 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 40 Data size: 162 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 162 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p2.myvalue from parquet_jointable1 p1 join parquet_jointable2 p2 on p1.key=p2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_jointable1 +PREHOOK: Input: default@parquet_jointable2 +#### A masked pattern was here #### +POSTHOOK: query: select p2.myvalue from parquet_jointable1 p1 join parquet_jointable2 p2 on p1.key=p2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_jointable1 +POSTHOOK: Input: default@parquet_jointable2 +#### A masked pattern was here #### +val_0value +val_10value +PREHOOK: query: -- SMB join + +create table parquet_jointable1_bucketed_sorted (key int,value string) clustered by (key) sorted by (key ASC) INTO 1 BUCKETS stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_jointable1_bucketed_sorted +POSTHOOK: query: -- SMB join + +create table parquet_jointable1_bucketed_sorted (key int,value string) clustered by (key) sorted by (key ASC) INTO 1 BUCKETS stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_jointable1_bucketed_sorted +PREHOOK: query: insert overwrite table parquet_jointable1_bucketed_sorted select key,concat(value,"value1") as value from staging cluster by key +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@parquet_jointable1_bucketed_sorted +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table parquet_jointable1_bucketed_sorted select key,concat(value,"value1") as value from staging cluster by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@parquet_jointable1_bucketed_sorted +POSTHOOK: Lineage: parquet_jointable1_bucketed_sorted.key SIMPLE [(staging)staging.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_jointable1_bucketed_sorted.value EXPRESSION [(staging)staging.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: create table parquet_jointable2_bucketed_sorted (key int,value1 string, value2 string) clustered by (key) sorted by (key ASC) INTO 1 BUCKETS stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_jointable2_bucketed_sorted +POSTHOOK: query: create table parquet_jointable2_bucketed_sorted (key int,value1 string, value2 string) clustered by (key) sorted by (key ASC) INTO 1 BUCKETS stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_jointable2_bucketed_sorted +PREHOOK: query: insert overwrite table parquet_jointable2_bucketed_sorted select key,concat(value,"value2-1") as value1,concat(value,"value2-2") as value2 from staging cluster by key +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@parquet_jointable2_bucketed_sorted +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table parquet_jointable2_bucketed_sorted select key,concat(value,"value2-1") as value1,concat(value,"value2-2") as value2 from staging cluster by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@parquet_jointable2_bucketed_sorted +POSTHOOK: Lineage: parquet_jointable2_bucketed_sorted.key SIMPLE [(staging)staging.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_jointable2_bucketed_sorted.value1 EXPRESSION [(staging)staging.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_jointable2_bucketed_sorted.value2 EXPRESSION [(staging)staging.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: explain select p1.value,p2.value2 from parquet_jointable1_bucketed_sorted p1 join parquet_jointable2_bucketed_sorted p2 on p1.key=p2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select p1.value,p2.value2 from parquet_jointable1_bucketed_sorted p1 join parquet_jointable2_bucketed_sorted p2 on p1.key=p2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 5 Data size: 526 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: NONE + value expressions: value2 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 3 Data size: 334 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col1} + outputColumnNames: _col1, _col7 + Statistics: Num rows: 3 Data size: 346 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 346 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 346 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p1.value,p2.value2 from parquet_jointable1_bucketed_sorted p1 join parquet_jointable2_bucketed_sorted p2 on p1.key=p2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_jointable1_bucketed_sorted +PREHOOK: Input: default@parquet_jointable2_bucketed_sorted +#### A masked pattern was here #### +POSTHOOK: query: select p1.value,p2.value2 from parquet_jointable1_bucketed_sorted p1 join parquet_jointable2_bucketed_sorted p2 on p1.key=p2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_jointable1_bucketed_sorted +POSTHOOK: Input: default@parquet_jointable2_bucketed_sorted +#### A masked pattern was here #### +val_0value1 val_0value2-2 +val_10value1 val_10value2-2 diff --git ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out new file mode 100644 index 0000000..c5c34c1 --- /dev/null +++ ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out @@ -0,0 +1,250 @@ +PREHOOK: query: EXPLAIN +SELECT src1.c1, count(1) +FROM +(SELECT src.key AS c1, src.value AS c2 from src where src.key > '1' ) src1 +JOIN +(SELECT src.key AS c3, src.value AS c4 from src where src.key > '2' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +WHERE src1.c1 > '20' AND (src1.c2 < 'val_50' OR src1.c1 > '2') AND (src2.c3 > '50' OR src1.c1 < '50') AND (src2.c3 <> '4') +GROUP BY src1.c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT src1.c1, count(1) +FROM +(SELECT src.key AS c1, src.value AS c2 from src where src.key > '1' ) src1 +JOIN +(SELECT src.key AS c3, src.value AS c4 from src where src.key > '2' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +WHERE src1.c1 > '20' AND (src1.c2 < 'val_50' OR src1.c1 > '2') AND (src2.c3 > '50' OR src1.c1 < '50') AND (src2.c3 <> '4') +GROUP BY src1.c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((key > '2') and (key < '400')) and key is not null) and (key <> '4')) and (key > '20')) (type: boolean) + Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 < '400') and _col0 is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((key > '1') and (key < '400')) and key is not null) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 < '400') and _col0 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT src1.c1, count(1) +FROM +(SELECT src.key AS c1, src.value AS c2 from src where src.key > '1' ) src1 +JOIN +(SELECT src.key AS c3, src.value AS c4 from src where src.key > '2' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +WHERE src1.c1 > '20' AND (src1.c2 < 'val_50' OR src1.c1 > '2') AND (src2.c3 > '50' OR src1.c1 < '50') AND (src2.c3 <> '4') +GROUP BY src1.c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT src1.c1, count(1) +FROM +(SELECT src.key AS c1, src.value AS c2 from src where src.key > '1' ) src1 +JOIN +(SELECT src.key AS c3, src.value AS c4 from src where src.key > '2' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +WHERE src1.c1 > '20' AND (src1.c2 < 'val_50' OR src1.c1 > '2') AND (src2.c3 > '50' OR src1.c1 < '50') AND (src2.c3 <> '4') +GROUP BY src1.c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((key > '2') and (key < '400')) and key is not null) and (key <> '4')) and (key > '20')) (type: boolean) + Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((key > '1') and (key < '400')) and key is not null) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/ppd_join.q.out ql/src/test/results/clientpositive/spark/ppd_join.q.out new file mode 100644 index 0000000..48ed98b --- /dev/null +++ ql/src/test/results/clientpositive/spark/ppd_join.q.out @@ -0,0 +1,1074 @@ +PREHOOK: query: EXPLAIN +SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key > '1' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key > '2' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +WHERE src1.c1 > '20' and (src1.c2 < 'val_50' or src1.c1 > '2') and (src2.c3 > '50' or src1.c1 < '50') and (src2.c3 <> '4') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key > '1' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key > '2' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +WHERE src1.c1 > '20' and (src1.c2 < 'val_50' or src1.c1 > '2') and (src2.c3 > '50' or src1.c1 < '50') and (src2.c3 <> '4') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((key > '2') and (key < '400')) and key is not null) and (key <> '4')) and (key > '20')) (type: boolean) + Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 < '400') and _col0 is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((key > '1') and (key < '400')) and key is not null) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 < '400') and _col0 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key > '1' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key > '2' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +WHERE src1.c1 > '20' and (src1.c2 < 'val_50' or src1.c1 > '2') and (src2.c3 > '50' or src1.c1 < '50') and (src2.c3 <> '4') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key > '1' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key > '2' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +WHERE src1.c1 > '20' and (src1.c2 < 'val_50' or src1.c1 > '2') and (src2.c3 > '50' or src1.c1 < '50') and (src2.c3 <> '4') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +200 val_200 +200 val_200 +200 val_200 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +214 val_214 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +218 val_218 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +222 val_222 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +235 val_235 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +241 val_241 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 +252 val_252 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +257 val_257 +258 val_258 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +260 val_260 +262 val_262 +263 val_263 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +274 val_274 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +28 val_28 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +287 val_287 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +289 val_289 +291 val_291 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +302 val_302 +305 val_305 +306 val_306 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +308 val_308 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +310 val_310 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +315 val_315 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +323 val_323 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +33 val_33 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +332 val_332 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +345 val_345 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +351 val_351 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +365 val_365 +366 val_366 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +373 val_373 +374 val_374 +375 val_375 +377 val_377 +378 val_378 +379 val_379 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +PREHOOK: query: EXPLAIN +SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key > '1' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key > '2' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +WHERE src1.c1 > '20' and (src1.c2 < 'val_50' or src1.c1 > '2') and (src2.c3 > '50' or src1.c1 < '50') and (src2.c3 <> '4') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key > '1' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key > '2' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +WHERE src1.c1 > '20' and (src1.c2 < 'val_50' or src1.c1 > '2') and (src2.c3 > '50' or src1.c1 < '50') and (src2.c3 <> '4') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((key > '2') and (key < '400')) and key is not null) and (key <> '4')) and (key > '20')) (type: boolean) + Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((key > '1') and (key < '400')) and key is not null) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key > '1' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key > '2' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +WHERE src1.c1 > '20' and (src1.c2 < 'val_50' or src1.c1 > '2') and (src2.c3 > '50' or src1.c1 < '50') and (src2.c3 <> '4') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key > '1' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key > '2' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +WHERE src1.c1 > '20' and (src1.c2 < 'val_50' or src1.c1 > '2') and (src2.c3 > '50' or src1.c1 < '50') and (src2.c3 <> '4') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +200 val_200 +200 val_200 +200 val_200 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +214 val_214 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +218 val_218 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +222 val_222 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +235 val_235 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +241 val_241 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 +252 val_252 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +257 val_257 +258 val_258 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +260 val_260 +262 val_262 +263 val_263 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +274 val_274 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +28 val_28 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +287 val_287 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +289 val_289 +291 val_291 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +302 val_302 +305 val_305 +306 val_306 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +308 val_308 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +310 val_310 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +315 val_315 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +323 val_323 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +33 val_33 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +332 val_332 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +345 val_345 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +351 val_351 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +365 val_365 +366 val_366 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +373 val_373 +374 val_374 +375 val_375 +377 val_377 +378 val_378 +379 val_379 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +399 val_399 +399 val_399 diff --git ql/src/test/results/clientpositive/spark/ppd_join2.q.out ql/src/test/results/clientpositive/spark/ppd_join2.q.out new file mode 100644 index 0000000..21228c1 --- /dev/null +++ ql/src/test/results/clientpositive/spark/ppd_join2.q.out @@ -0,0 +1,3374 @@ +PREHOOK: query: EXPLAIN +SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key <> '302' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key <> '305' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +JOIN +(SELECT src.key as c5, src.value as c6 from src where src.key <> '306' ) src3 +ON src1.c2 = src3.c6 +WHERE src1.c1 <> '311' and (src1.c2 <> 'val_50' or src1.c1 > '1') and (src2.c3 <> '10' or src1.c1 <> '10') and (src2.c3 <> '14') and (sqrt(src3.c5) <> 13) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key <> '302' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key <> '305' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +JOIN +(SELECT src.key as c5, src.value as c6 from src where src.key <> '306' ) src3 +ON src1.c2 = src3.c6 +WHERE src1.c1 <> '311' and (src1.c2 <> 'val_50' or src1.c1 > '1') and (src2.c3 <> '10' or src1.c1 <> '10') and (src2.c3 <> '14') and (sqrt(src3.c5) <> 13) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((key <> '305') and (key < '400')) and key is not null) and (key <> '14')) and (key <> '311')) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 < '400') and _col0 is not null) (type: boolean) + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((((key <> '302') and (key < '400')) and key is not null) and value is not null) and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key <> '14')) (type: boolean) + Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 < '400') and _col0 is not null) (type: boolean) + Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key <> '306') and value is not null) and (sqrt(key) <> 13)) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 8 Data size: 86 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 8 Data size: 86 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((_col0 <> '311') and ((_col1 <> 'val_50') or (_col0 > '1'))) and ((_col2 <> '10') or (_col0 <> '10'))) and (_col2 <> '14')) and (sqrt(_col4) <> 13)) (type: boolean) + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key <> '302' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key <> '305' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +JOIN +(SELECT src.key as c5, src.value as c6 from src where src.key <> '306' ) src3 +ON src1.c2 = src3.c6 +WHERE src1.c1 <> '311' and (src1.c2 <> 'val_50' or src1.c1 > '1') and (src2.c3 <> '10' or src1.c1 <> '10') and (src2.c3 <> '14') and (sqrt(src3.c5) <> 13) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key <> '302' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key <> '305' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +JOIN +(SELECT src.key as c5, src.value as c6 from src where src.key <> '306' ) src3 +ON src1.c2 = src3.c6 +WHERE src1.c1 <> '311' and (src1.c2 <> 'val_50' or src1.c1 > '1') and (src2.c3 <> '10' or src1.c1 <> '10') and (src2.c3 <> '14') and (sqrt(src3.c5) <> 13) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +20 val_20 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +214 val_214 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +218 val_218 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +222 val_222 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +235 val_235 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +241 val_241 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 +252 val_252 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +257 val_257 +258 val_258 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +260 val_260 +262 val_262 +263 val_263 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +274 val_274 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +28 val_28 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +287 val_287 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +289 val_289 +291 val_291 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +308 val_308 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +310 val_310 +315 val_315 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +323 val_323 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +33 val_33 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +332 val_332 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +345 val_345 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +351 val_351 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +365 val_365 +366 val_366 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +373 val_373 +374 val_374 +375 val_375 +377 val_377 +378 val_378 +379 val_379 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +4 val_4 +PREHOOK: query: EXPLAIN +SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key <> '302' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key <> '305' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +JOIN +(SELECT src.key as c5, src.value as c6 from src where src.key <> '306' ) src3 +ON src1.c2 = src3.c6 +WHERE src1.c1 <> '311' and (src1.c2 <> 'val_50' or src1.c1 > '1') and (src2.c3 <> '10' or src1.c1 <> '10') and (src2.c3 <> '14') and (sqrt(src3.c5) <> 13) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key <> '302' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key <> '305' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +JOIN +(SELECT src.key as c5, src.value as c6 from src where src.key <> '306' ) src3 +ON src1.c2 = src3.c6 +WHERE src1.c1 <> '311' and (src1.c2 <> 'val_50' or src1.c1 > '1') and (src2.c3 <> '10' or src1.c1 <> '10') and (src2.c3 <> '14') and (sqrt(src3.c5) <> 13) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((key <> '305') and (key < '400')) and key is not null) and (key <> '14')) and (key <> '311')) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((((key <> '302') and (key < '400')) and key is not null) and value is not null) and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key <> '14')) (type: boolean) + Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 56 Data size: 594 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key <> '306') and value is not null) and (sqrt(key) <> 13)) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((_col0 <> '311') and ((_col1 <> 'val_50') or (_col0 > '1'))) and ((_col2 <> '10') or (_col0 <> '10'))) and (_col2 <> '14')) and (sqrt(_col4) <> 13)) (type: boolean) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key <> '302' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key <> '305' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +JOIN +(SELECT src.key as c5, src.value as c6 from src where src.key <> '306' ) src3 +ON src1.c2 = src3.c6 +WHERE src1.c1 <> '311' and (src1.c2 <> 'val_50' or src1.c1 > '1') and (src2.c3 <> '10' or src1.c1 <> '10') and (src2.c3 <> '14') and (sqrt(src3.c5) <> 13) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key <> '302' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key <> '305' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +JOIN +(SELECT src.key as c5, src.value as c6 from src where src.key <> '306' ) src3 +ON src1.c2 = src3.c6 +WHERE src1.c1 <> '311' and (src1.c2 <> 'val_50' or src1.c1 > '1') and (src2.c3 <> '10' or src1.c1 <> '10') and (src2.c3 <> '14') and (sqrt(src3.c5) <> 13) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +20 val_20 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +214 val_214 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +218 val_218 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +222 val_222 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +235 val_235 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +241 val_241 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 +252 val_252 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +257 val_257 +258 val_258 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +260 val_260 +262 val_262 +263 val_263 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +274 val_274 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +28 val_28 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +287 val_287 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +289 val_289 +291 val_291 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +308 val_308 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +310 val_310 +315 val_315 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +323 val_323 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +33 val_33 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +332 val_332 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +345 val_345 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +351 val_351 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +365 val_365 +366 val_366 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +373 val_373 +374 val_374 +375 val_375 +377 val_377 +378 val_378 +379 val_379 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +4 val_4 diff --git ql/src/test/results/clientpositive/spark/ppd_join3.q.out ql/src/test/results/clientpositive/spark/ppd_join3.q.out new file mode 100644 index 0000000..f95f53e --- /dev/null +++ ql/src/test/results/clientpositive/spark/ppd_join3.q.out @@ -0,0 +1,3453 @@ +PREHOOK: query: EXPLAIN +SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key <> '11' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key <> '12' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +JOIN +(SELECT src.key as c5, src.value as c6 from src where src.key <> '13' ) src3 +ON src1.c1 = src3.c5 +WHERE src1.c1 > '0' and (src1.c2 <> 'val_500' or src1.c1 > '1') and (src2.c3 > '10' or src1.c1 <> '10') and (src2.c3 <> '4') and (src3.c5 <> '1') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key <> '11' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key <> '12' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +JOIN +(SELECT src.key as c5, src.value as c6 from src where src.key <> '13' ) src3 +ON src1.c1 = src3.c5 +WHERE src1.c1 > '0' and (src1.c2 <> 'val_500' or src1.c1 > '1') and (src2.c3 > '10' or src1.c1 <> '10') and (src2.c3 <> '4') and (src3.c5 <> '1') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((key <> '12') and (key < '400')) and key is not null) and (key <> '4')) and (key > '0')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 < '400') and _col0 is not null) (type: boolean) + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((((key <> '11') and (key < '400')) and key is not null) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 < '400') and _col0 is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((key <> '13') and (key < '400')) and key is not null) and (key <> '1')) and (key > '0')) and (key <> '4')) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 < '400') and _col0 is not null) (type: boolean) + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((_col0 > '0') and ((_col1 <> 'val_500') or (_col0 > '1'))) and ((_col2 > '10') or (_col0 <> '10'))) and (_col2 <> '4')) and (_col4 <> '1')) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key <> '11' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key <> '12' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +JOIN +(SELECT src.key as c5, src.value as c6 from src where src.key <> '13' ) src3 +ON src1.c1 = src3.c5 +WHERE src1.c1 > '0' and (src1.c2 <> 'val_500' or src1.c1 > '1') and (src2.c3 > '10' or src1.c1 <> '10') and (src2.c3 <> '4') and (src3.c5 <> '1') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key <> '11' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key <> '12' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +JOIN +(SELECT src.key as c5, src.value as c6 from src where src.key <> '13' ) src3 +ON src1.c1 = src3.c5 +WHERE src1.c1 > '0' and (src1.c2 <> 'val_500' or src1.c1 > '1') and (src2.c3 > '10' or src1.c1 <> '10') and (src2.c3 <> '4') and (src3.c5 <> '1') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +105 val_105 +111 val_111 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +20 val_20 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +214 val_214 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +218 val_218 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +222 val_222 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +235 val_235 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +241 val_241 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 +252 val_252 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +257 val_257 +258 val_258 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +260 val_260 +262 val_262 +263 val_263 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +274 val_274 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +28 val_28 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +287 val_287 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +289 val_289 +291 val_291 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +302 val_302 +305 val_305 +306 val_306 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +308 val_308 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +310 val_310 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +315 val_315 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +323 val_323 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +33 val_33 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +332 val_332 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +345 val_345 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +351 val_351 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +365 val_365 +366 val_366 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +373 val_373 +374 val_374 +375 val_375 +377 val_377 +378 val_378 +379 val_379 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +PREHOOK: query: EXPLAIN +SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key <> '11' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key <> '12' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +JOIN +(SELECT src.key as c5, src.value as c6 from src where src.key <> '13' ) src3 +ON src1.c1 = src3.c5 +WHERE src1.c1 > '0' and (src1.c2 <> 'val_500' or src1.c1 > '1') and (src2.c3 > '10' or src1.c1 <> '10') and (src2.c3 <> '4') and (src3.c5 <> '1') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key <> '11' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key <> '12' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +JOIN +(SELECT src.key as c5, src.value as c6 from src where src.key <> '13' ) src3 +ON src1.c1 = src3.c5 +WHERE src1.c1 > '0' and (src1.c2 <> 'val_500' or src1.c1 > '1') and (src2.c3 > '10' or src1.c1 <> '10') and (src2.c3 <> '4') and (src3.c5 <> '1') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((key <> '12') and (key < '400')) and key is not null) and (key <> '4')) and (key > '0')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((((key <> '11') and (key < '400')) and key is not null) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((((key <> '13') and (key < '400')) and key is not null) and (key <> '1')) and (key > '0')) and (key <> '4')) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 79 Data size: 840 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((_col0 > '0') and ((_col1 <> 'val_500') or (_col0 > '1'))) and ((_col2 > '10') or (_col0 <> '10'))) and (_col2 <> '4')) and (_col4 <> '1')) (type: boolean) + Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key <> '11' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key <> '12' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +JOIN +(SELECT src.key as c5, src.value as c6 from src where src.key <> '13' ) src3 +ON src1.c1 = src3.c5 +WHERE src1.c1 > '0' and (src1.c2 <> 'val_500' or src1.c1 > '1') and (src2.c3 > '10' or src1.c1 <> '10') and (src2.c3 <> '4') and (src3.c5 <> '1') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key <> '11' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key <> '12' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +JOIN +(SELECT src.key as c5, src.value as c6 from src where src.key <> '13' ) src3 +ON src1.c1 = src3.c5 +WHERE src1.c1 > '0' and (src1.c2 <> 'val_500' or src1.c1 > '1') and (src2.c3 > '10' or src1.c1 <> '10') and (src2.c3 <> '4') and (src3.c5 <> '1') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +105 val_105 +111 val_111 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +20 val_20 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +214 val_214 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +218 val_218 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +222 val_222 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +235 val_235 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +241 val_241 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 +252 val_252 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +257 val_257 +258 val_258 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +260 val_260 +262 val_262 +263 val_263 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +274 val_274 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +28 val_28 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +287 val_287 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +289 val_289 +291 val_291 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +302 val_302 +305 val_305 +306 val_306 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +308 val_308 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +310 val_310 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +315 val_315 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +323 val_323 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +33 val_33 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +332 val_332 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +345 val_345 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +351 val_351 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +365 val_365 +366 val_366 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +373 val_373 +374 val_374 +375 val_375 +377 val_377 +378 val_378 +379 val_379 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 diff --git ql/src/test/results/clientpositive/spark/ppd_join4.q.out ql/src/test/results/clientpositive/spark/ppd_join4.q.out new file mode 100644 index 0000000..36b5ed5 --- /dev/null +++ ql/src/test/results/clientpositive/spark/ppd_join4.q.out @@ -0,0 +1,136 @@ +PREHOOK: query: create table dual(a string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dual +POSTHOOK: query: create table dual(a string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dual +PREHOOK: query: drop table if exists test_tbl +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists test_tbl +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table test_tbl (id string,name string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_tbl +POSTHOOK: query: create table test_tbl (id string,name string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_tbl +PREHOOK: query: insert into table test_tbl +select 'a','b' from dual +PREHOOK: type: QUERY +PREHOOK: Input: default@dual +PREHOOK: Output: default@test_tbl +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert into table test_tbl +select 'a','b' from dual +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dual +POSTHOOK: Output: default@test_tbl +POSTHOOK: Lineage: test_tbl.id SIMPLE [] +POSTHOOK: Lineage: test_tbl.name SIMPLE [] +PREHOOK: query: explain +select t2.* +from +(select id,name from (select id,name from test_tbl) t1 sort by id) t2 +join test_tbl t3 on (t2.id=t3.id ) +where t2.name='c' and t3.id='a' +PREHOOK: type: QUERY +POSTHOOK: query: explain +select t2.* +from +(select id,name from (select id,name from test_tbl) t1 sort by id) t2 +join test_tbl t3 on (t2.id=t3.id ) +where t2.name='c' and t3.id='a' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (id is not null and (id = 'a')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: 'a' (type: string) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: test_tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: ((id is not null and (name = 'c')) and (id = 'a')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: 'a' (type: string) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 'a' (type: string), 'c' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: 'a' (type: string) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select t2.* +from +(select id,name from (select id,name from test_tbl) t1 sort by id) t2 +join test_tbl t3 on (t2.id=t3.id ) +where t2.name='c' and t3.id='a' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_tbl +#### A masked pattern was here #### +POSTHOOK: query: select t2.* +from +(select id,name from (select id,name from test_tbl) t1 sort by id) t2 +join test_tbl t3 on (t2.id=t3.id ) +where t2.name='c' and t3.id='a' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_tbl +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/ppd_join5.q.out ql/src/test/results/clientpositive/spark/ppd_join5.q.out new file mode 100644 index 0000000..aea1b0a --- /dev/null +++ ql/src/test/results/clientpositive/spark/ppd_join5.q.out @@ -0,0 +1,267 @@ +PREHOOK: query: create table t1 (id1 string, id2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1 (id1 string, id2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: create table t2 (id string, d int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2 (id string, d int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: from src tablesample (1 rows) + insert into table t1 select 'a','a' + insert into table t2 select 'a',2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t1 +PREHOOK: Output: default@t2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: from src tablesample (1 rows) + insert into table t1 select 'a','a' + insert into table t2 select 'a',2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t1.id1 SIMPLE [] +POSTHOOK: Lineage: t1.id2 SIMPLE [] +POSTHOOK: Lineage: t2.d SIMPLE [] +POSTHOOK: Lineage: t2.id SIMPLE [] +PREHOOK: query: explain +select a.*,b.d d1,c.d d2 from + t1 a join t2 b on (a.id1 = b.id) + join t2 c on (a.id2 = b.id) where b.d <= 1 and c.d <= 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.*,b.d d1,c.d d2 from + t1 a join t2 b on (a.id1 = b.id) + join t2 c on (a.id2 = b.id) where b.d <= 1 and c.d <= 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 4 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (id is not null and (d <= 1)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string), id (type: string) + sort order: ++ + Map-reduce partition columns: id (type: string), id (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: d (type: int) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d <= 1) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: d (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 4 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (id1 is not null and id2 is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: id1 (type: string), id2 (type: string) + sort order: ++ + Map-reduce partition columns: id1 (type: string), id2 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col6} + 1 {VALUE._col1} + outputColumnNames: _col0, _col1, _col6, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select * from ( +select a.*,b.d d1,c.d d2 from + t1 a join t2 b on (a.id1 = b.id) + join t2 c on (a.id2 = b.id) where b.d <= 1 and c.d <= 1 +) z where d1 > 1 or d2 > 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from ( +select a.*,b.d d1,c.d d2 from + t1 a join t2 b on (a.id1 = b.id) + join t2 c on (a.id2 = b.id) where b.d <= 1 and c.d <= 1 +) z where d1 > 1 or d2 > 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 4 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (id1 is not null and id2 is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: id1 (type: string), id2 (type: string) + sort order: ++ + Map-reduce partition columns: id1 (type: string), id2 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 4 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (id is not null and (d <= 1)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: id (type: string), id (type: string) + sort order: ++ + Map-reduce partition columns: id (type: string), id (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: d (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d <= 1) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: d (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col6} + 1 {VALUE._col1} + outputColumnNames: _col0, _col1, _col6, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: ((_col6 > 1) or (_col11 > 1)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from ( +select a.*,b.d d1,c.d d2 from + t1 a join t2 b on (a.id1 = b.id) + join t2 c on (a.id2 = b.id) where b.d <= 1 and c.d <= 1 +) z where d1 > 1 or d2 > 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from ( +select a.*,b.d d1,c.d d2 from + t1 a join t2 b on (a.id1 = b.id) + join t2 c on (a.id2 = b.id) where b.d <= 1 and c.d <= 1 +) z where d1 > 1 or d2 > 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out new file mode 100644 index 0000000..c6cb1b4 --- /dev/null +++ ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out @@ -0,0 +1,1508 @@ +PREHOOK: query: explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + k + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 1 + k1 + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 2 + k2 + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 3 + k3 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + < + . + TOK_TABLE_OR_COL + b + k1 + 5 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + k2 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + k3 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(key) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col2} {VALUE._col3} + outputColumnNames: _col0, _col8, _col9 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:double:double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col2 < 5.0) and _col0 is not null) (type: boolean) + Statistics: Num rows: 21 Data size: 223 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 21 Data size: 223 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) + auto parallelism: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 2.0 3.0 +0 2.0 3.0 +0 2.0 3.0 +2 4.0 5.0 +PREHOOK: query: explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + k + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 1 + k1 + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 2 + k2 + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 3 + k3 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + < + . + TOK_TABLE_OR_COL + b + k1 + 5 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + k2 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + k3 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(key) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col2} {VALUE._col3} + outputColumnNames: _col0, _col8, _col9 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:double:double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (_col2 < 5.0) (type: boolean) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) + auto parallelism: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 2.0 3.0 +0 2.0 3.0 +0 2.0 3.0 +2 4.0 5.0 +PREHOOK: query: explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + k + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 1 + k1 + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 2 + k2 + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 3 + k3 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + < + . + TOK_TABLE_OR_COL + b + k1 + 5 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + k2 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + k3 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(key) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col2} {VALUE._col3} + outputColumnNames: _col0, _col8, _col9 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:double:double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col2 < 5.0) and _col0 is not null) (type: boolean) + Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) + auto parallelism: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 2.0 3.0 +0 2.0 3.0 +0 2.0 3.0 +2 4.0 5.0 +PREHOOK: query: explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + k + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 1 + k1 + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 2 + k2 + TOK_SELEXPR + + + TOK_FUNCTION + min + TOK_TABLE_OR_COL + key + 3 + k3 + TOK_GROUPBY + TOK_TABLE_OR_COL + key + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + < + . + TOK_TABLE_OR_COL + b + k1 + 5 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + k2 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + k3 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(key) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col2} {VALUE._col3} + outputColumnNames: _col0, _col8, _col9 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:double:double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (_col2 < 5.0) (type: boolean) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) + auto parallelism: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select a.key, b.k2, b.k3 +from src a +join ( +select key, +min(key) as k, +min(key)+1 as k1, +min(key)+2 as k2, +min(key)+3 as k3 +from src +group by key +) b +on a.key=b.key and b.k1 < 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 2.0 3.0 +0 2.0 3.0 +0 2.0 3.0 +2 4.0 5.0 diff --git ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out new file mode 100644 index 0000000..76b541d --- /dev/null +++ ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out @@ -0,0 +1,228 @@ +PREHOOK: query: EXPLAIN + FROM + src a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN + FROM + src a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col0 > 10) and (_col0 < 20)) and (_col5 > 15)) and (_col5 < 25)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +PREHOOK: query: EXPLAIN + FROM + src a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN + FROM + src a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 > 15) and (_col5 < 25)) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 diff --git ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out new file mode 100644 index 0000000..6a66c83 --- /dev/null +++ ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out @@ -0,0 +1,468 @@ +PREHOOK: query: EXPLAIN + FROM + src a + RIGHT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN + FROM + src a + RIGHT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '15') and (key < '25')) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '15') and (key < '25')) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col0 > '10') and (_col0 < '20')) and (_col5 > '15')) and (_col5 < '25')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + RIGHT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + RIGHT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +150 val_150 150 val_150 +152 val_152 152 val_152 +152 val_152 152 val_152 +152 val_152 152 val_152 +152 val_152 152 val_152 +153 val_153 153 val_153 +155 val_155 155 val_155 +156 val_156 156 val_156 +157 val_157 157 val_157 +158 val_158 158 val_158 +160 val_160 160 val_160 +162 val_162 162 val_162 +163 val_163 163 val_163 +164 val_164 164 val_164 +164 val_164 164 val_164 +164 val_164 164 val_164 +164 val_164 164 val_164 +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +166 val_166 166 val_166 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +168 val_168 168 val_168 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +17 val_17 17 val_17 +170 val_170 170 val_170 +172 val_172 172 val_172 +172 val_172 172 val_172 +172 val_172 172 val_172 +172 val_172 172 val_172 +174 val_174 174 val_174 +174 val_174 174 val_174 +174 val_174 174 val_174 +174 val_174 174 val_174 +175 val_175 175 val_175 +175 val_175 175 val_175 +175 val_175 175 val_175 +175 val_175 175 val_175 +176 val_176 176 val_176 +176 val_176 176 val_176 +176 val_176 176 val_176 +176 val_176 176 val_176 +177 val_177 177 val_177 +178 val_178 178 val_178 +179 val_179 179 val_179 +179 val_179 179 val_179 +179 val_179 179 val_179 +179 val_179 179 val_179 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +180 val_180 180 val_180 +181 val_181 181 val_181 +183 val_183 183 val_183 +186 val_186 186 val_186 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +189 val_189 189 val_189 +19 val_19 19 val_19 +190 val_190 190 val_190 +191 val_191 191 val_191 +191 val_191 191 val_191 +191 val_191 191 val_191 +191 val_191 191 val_191 +192 val_192 192 val_192 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +194 val_194 194 val_194 +195 val_195 195 val_195 +195 val_195 195 val_195 +195 val_195 195 val_195 +195 val_195 195 val_195 +196 val_196 196 val_196 +197 val_197 197 val_197 +197 val_197 197 val_197 +197 val_197 197 val_197 +197 val_197 197 val_197 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +2 val_2 2 val_2 +PREHOOK: query: EXPLAIN + FROM + src a + RIGHT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN + FROM + src a + RIGHT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '15') and (key < '25')) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '15') and (key < '25')) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 > '10') and (_col0 < '20')) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + RIGHT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + RIGHT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +150 val_150 150 val_150 +152 val_152 152 val_152 +152 val_152 152 val_152 +152 val_152 152 val_152 +152 val_152 152 val_152 +153 val_153 153 val_153 +155 val_155 155 val_155 +156 val_156 156 val_156 +157 val_157 157 val_157 +158 val_158 158 val_158 +160 val_160 160 val_160 +162 val_162 162 val_162 +163 val_163 163 val_163 +164 val_164 164 val_164 +164 val_164 164 val_164 +164 val_164 164 val_164 +164 val_164 164 val_164 +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +166 val_166 166 val_166 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +168 val_168 168 val_168 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +17 val_17 17 val_17 +170 val_170 170 val_170 +172 val_172 172 val_172 +172 val_172 172 val_172 +172 val_172 172 val_172 +172 val_172 172 val_172 +174 val_174 174 val_174 +174 val_174 174 val_174 +174 val_174 174 val_174 +174 val_174 174 val_174 +175 val_175 175 val_175 +175 val_175 175 val_175 +175 val_175 175 val_175 +175 val_175 175 val_175 +176 val_176 176 val_176 +176 val_176 176 val_176 +176 val_176 176 val_176 +176 val_176 176 val_176 +177 val_177 177 val_177 +178 val_178 178 val_178 +179 val_179 179 val_179 +179 val_179 179 val_179 +179 val_179 179 val_179 +179 val_179 179 val_179 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +180 val_180 180 val_180 +181 val_181 181 val_181 +183 val_183 183 val_183 +186 val_186 186 val_186 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +189 val_189 189 val_189 +19 val_19 19 val_19 +190 val_190 190 val_190 +191 val_191 191 val_191 +191 val_191 191 val_191 +191 val_191 191 val_191 +191 val_191 191 val_191 +192 val_192 192 val_192 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +194 val_194 194 val_194 +195 val_195 195 val_195 +195 val_195 195 val_195 +195 val_195 195 val_195 +195 val_195 195 val_195 +196 val_196 196 val_196 +197 val_197 197 val_197 +197 val_197 197 val_197 +197 val_197 197 val_197 +197 val_197 197 val_197 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +2 val_2 2 val_2 diff --git ql/src/test/results/clientpositive/spark/ppd_outer_join3.q.out ql/src/test/results/clientpositive/spark/ppd_outer_join3.q.out new file mode 100644 index 0000000..d783562 --- /dev/null +++ ql/src/test/results/clientpositive/spark/ppd_outer_join3.q.out @@ -0,0 +1,456 @@ +PREHOOK: query: EXPLAIN + FROM + src a + FULL OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN + FROM + src a + FULL OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col0 > '10') and (_col0 < '20')) and (_col5 > '15')) and (_col5 < '25')) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + FULL OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + FULL OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +150 val_150 150 val_150 +152 val_152 152 val_152 +152 val_152 152 val_152 +152 val_152 152 val_152 +152 val_152 152 val_152 +153 val_153 153 val_153 +155 val_155 155 val_155 +156 val_156 156 val_156 +157 val_157 157 val_157 +158 val_158 158 val_158 +160 val_160 160 val_160 +162 val_162 162 val_162 +163 val_163 163 val_163 +164 val_164 164 val_164 +164 val_164 164 val_164 +164 val_164 164 val_164 +164 val_164 164 val_164 +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +166 val_166 166 val_166 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +168 val_168 168 val_168 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +17 val_17 17 val_17 +170 val_170 170 val_170 +172 val_172 172 val_172 +172 val_172 172 val_172 +172 val_172 172 val_172 +172 val_172 172 val_172 +174 val_174 174 val_174 +174 val_174 174 val_174 +174 val_174 174 val_174 +174 val_174 174 val_174 +175 val_175 175 val_175 +175 val_175 175 val_175 +175 val_175 175 val_175 +175 val_175 175 val_175 +176 val_176 176 val_176 +176 val_176 176 val_176 +176 val_176 176 val_176 +176 val_176 176 val_176 +177 val_177 177 val_177 +178 val_178 178 val_178 +179 val_179 179 val_179 +179 val_179 179 val_179 +179 val_179 179 val_179 +179 val_179 179 val_179 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +180 val_180 180 val_180 +181 val_181 181 val_181 +183 val_183 183 val_183 +186 val_186 186 val_186 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +189 val_189 189 val_189 +19 val_19 19 val_19 +190 val_190 190 val_190 +191 val_191 191 val_191 +191 val_191 191 val_191 +191 val_191 191 val_191 +191 val_191 191 val_191 +192 val_192 192 val_192 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +194 val_194 194 val_194 +195 val_195 195 val_195 +195 val_195 195 val_195 +195 val_195 195 val_195 +195 val_195 195 val_195 +196 val_196 196 val_196 +197 val_197 197 val_197 +197 val_197 197 val_197 +197 val_197 197 val_197 +197 val_197 197 val_197 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +2 val_2 2 val_2 +PREHOOK: query: EXPLAIN + FROM + src a + FULL OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN + FROM + src a + FULL OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col5 > '15') and (_col5 < '25')) and (_col0 > '10')) and (_col0 < '20')) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + FULL OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + FULL OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +150 val_150 150 val_150 +152 val_152 152 val_152 +152 val_152 152 val_152 +152 val_152 152 val_152 +152 val_152 152 val_152 +153 val_153 153 val_153 +155 val_155 155 val_155 +156 val_156 156 val_156 +157 val_157 157 val_157 +158 val_158 158 val_158 +160 val_160 160 val_160 +162 val_162 162 val_162 +163 val_163 163 val_163 +164 val_164 164 val_164 +164 val_164 164 val_164 +164 val_164 164 val_164 +164 val_164 164 val_164 +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +166 val_166 166 val_166 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +168 val_168 168 val_168 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +17 val_17 17 val_17 +170 val_170 170 val_170 +172 val_172 172 val_172 +172 val_172 172 val_172 +172 val_172 172 val_172 +172 val_172 172 val_172 +174 val_174 174 val_174 +174 val_174 174 val_174 +174 val_174 174 val_174 +174 val_174 174 val_174 +175 val_175 175 val_175 +175 val_175 175 val_175 +175 val_175 175 val_175 +175 val_175 175 val_175 +176 val_176 176 val_176 +176 val_176 176 val_176 +176 val_176 176 val_176 +176 val_176 176 val_176 +177 val_177 177 val_177 +178 val_178 178 val_178 +179 val_179 179 val_179 +179 val_179 179 val_179 +179 val_179 179 val_179 +179 val_179 179 val_179 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +180 val_180 180 val_180 +181 val_181 181 val_181 +183 val_183 183 val_183 +186 val_186 186 val_186 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +189 val_189 189 val_189 +19 val_19 19 val_19 +190 val_190 190 val_190 +191 val_191 191 val_191 +191 val_191 191 val_191 +191 val_191 191 val_191 +191 val_191 191 val_191 +192 val_192 192 val_192 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +194 val_194 194 val_194 +195 val_195 195 val_195 +195 val_195 195 val_195 +195 val_195 195 val_195 +195 val_195 195 val_195 +196 val_196 196 val_196 +197 val_197 197 val_197 +197 val_197 197 val_197 +197 val_197 197 val_197 +197 val_197 197 val_197 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +2 val_2 2 val_2 diff --git ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out new file mode 100644 index 0000000..e235fca --- /dev/null +++ ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out @@ -0,0 +1,730 @@ +PREHOOK: query: EXPLAIN + FROM + src a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + RIGHT OUTER JOIN + src c + ON (a.key = c.key) + SELECT a.key, a.value, b.key, b.value, c.key + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN + FROM + src a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + RIGHT OUTER JOIN + src c + ON (a.key = c.key) + SELECT a.key, a.value, b.key, b.value, c.key + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sqrt(key) <> 13) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sqrt(key) <> 13) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sqrt(key) <> 13) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((_col0 > '10') and (_col0 < '20')) and (_col5 > '15')) and (_col5 < '25')) and (sqrt(_col10) <> 13)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + RIGHT OUTER JOIN + src c + ON (a.key = c.key) + SELECT a.key, a.value, b.key, b.value, c.key + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + RIGHT OUTER JOIN + src c + ON (a.key = c.key) + SELECT a.key, a.value, b.key, b.value, c.key + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +150 val_150 150 val_150 150 +152 val_152 152 val_152 152 +152 val_152 152 val_152 152 +152 val_152 152 val_152 152 +152 val_152 152 val_152 152 +152 val_152 152 val_152 152 +152 val_152 152 val_152 152 +152 val_152 152 val_152 152 +152 val_152 152 val_152 152 +153 val_153 153 val_153 153 +155 val_155 155 val_155 155 +156 val_156 156 val_156 156 +157 val_157 157 val_157 157 +158 val_158 158 val_158 158 +160 val_160 160 val_160 160 +162 val_162 162 val_162 162 +163 val_163 163 val_163 163 +164 val_164 164 val_164 164 +164 val_164 164 val_164 164 +164 val_164 164 val_164 164 +164 val_164 164 val_164 164 +164 val_164 164 val_164 164 +164 val_164 164 val_164 164 +164 val_164 164 val_164 164 +164 val_164 164 val_164 164 +165 val_165 165 val_165 165 +165 val_165 165 val_165 165 +165 val_165 165 val_165 165 +165 val_165 165 val_165 165 +165 val_165 165 val_165 165 +165 val_165 165 val_165 165 +165 val_165 165 val_165 165 +165 val_165 165 val_165 165 +166 val_166 166 val_166 166 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +168 val_168 168 val_168 168 +17 val_17 17 val_17 17 +170 val_170 170 val_170 170 +172 val_172 172 val_172 172 +172 val_172 172 val_172 172 +172 val_172 172 val_172 172 +172 val_172 172 val_172 172 +172 val_172 172 val_172 172 +172 val_172 172 val_172 172 +172 val_172 172 val_172 172 +172 val_172 172 val_172 172 +174 val_174 174 val_174 174 +174 val_174 174 val_174 174 +174 val_174 174 val_174 174 +174 val_174 174 val_174 174 +174 val_174 174 val_174 174 +174 val_174 174 val_174 174 +174 val_174 174 val_174 174 +174 val_174 174 val_174 174 +175 val_175 175 val_175 175 +175 val_175 175 val_175 175 +175 val_175 175 val_175 175 +175 val_175 175 val_175 175 +175 val_175 175 val_175 175 +175 val_175 175 val_175 175 +175 val_175 175 val_175 175 +175 val_175 175 val_175 175 +176 val_176 176 val_176 176 +176 val_176 176 val_176 176 +176 val_176 176 val_176 176 +176 val_176 176 val_176 176 +176 val_176 176 val_176 176 +176 val_176 176 val_176 176 +176 val_176 176 val_176 176 +176 val_176 176 val_176 176 +177 val_177 177 val_177 177 +178 val_178 178 val_178 178 +179 val_179 179 val_179 179 +179 val_179 179 val_179 179 +179 val_179 179 val_179 179 +179 val_179 179 val_179 179 +179 val_179 179 val_179 179 +179 val_179 179 val_179 179 +179 val_179 179 val_179 179 +179 val_179 179 val_179 179 +18 val_18 18 val_18 18 +18 val_18 18 val_18 18 +18 val_18 18 val_18 18 +18 val_18 18 val_18 18 +18 val_18 18 val_18 18 +18 val_18 18 val_18 18 +18 val_18 18 val_18 18 +18 val_18 18 val_18 18 +180 val_180 180 val_180 180 +181 val_181 181 val_181 181 +183 val_183 183 val_183 183 +186 val_186 186 val_186 186 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +189 val_189 189 val_189 189 +19 val_19 19 val_19 19 +190 val_190 190 val_190 190 +191 val_191 191 val_191 191 +191 val_191 191 val_191 191 +191 val_191 191 val_191 191 +191 val_191 191 val_191 191 +191 val_191 191 val_191 191 +191 val_191 191 val_191 191 +191 val_191 191 val_191 191 +191 val_191 191 val_191 191 +192 val_192 192 val_192 192 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +194 val_194 194 val_194 194 +195 val_195 195 val_195 195 +195 val_195 195 val_195 195 +195 val_195 195 val_195 195 +195 val_195 195 val_195 195 +195 val_195 195 val_195 195 +195 val_195 195 val_195 195 +195 val_195 195 val_195 195 +195 val_195 195 val_195 195 +196 val_196 196 val_196 196 +197 val_197 197 val_197 197 +197 val_197 197 val_197 197 +197 val_197 197 val_197 197 +197 val_197 197 val_197 197 +197 val_197 197 val_197 197 +197 val_197 197 val_197 197 +197 val_197 197 val_197 197 +197 val_197 197 val_197 197 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +2 val_2 2 val_2 2 +PREHOOK: query: EXPLAIN + FROM + src a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + RIGHT OUTER JOIN + src c + ON (a.key = c.key) + SELECT a.key, a.value, b.key, b.value, c.key + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN + FROM + src a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + RIGHT OUTER JOIN + src c + ON (a.key = c.key) + SELECT a.key, a.value, b.key, b.value, c.key + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sqrt(key) <> 13) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sqrt(key) <> 13) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sqrt(key) <> 13) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col5 > '15') and (_col5 < '25')) and (_col0 > '10')) and (_col0 < '20')) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + RIGHT OUTER JOIN + src c + ON (a.key = c.key) + SELECT a.key, a.value, b.key, b.value, c.key + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + LEFT OUTER JOIN + src b + ON (a.key = b.key) + RIGHT OUTER JOIN + src c + ON (a.key = c.key) + SELECT a.key, a.value, b.key, b.value, c.key + WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +150 val_150 150 val_150 150 +152 val_152 152 val_152 152 +152 val_152 152 val_152 152 +152 val_152 152 val_152 152 +152 val_152 152 val_152 152 +152 val_152 152 val_152 152 +152 val_152 152 val_152 152 +152 val_152 152 val_152 152 +152 val_152 152 val_152 152 +153 val_153 153 val_153 153 +155 val_155 155 val_155 155 +156 val_156 156 val_156 156 +157 val_157 157 val_157 157 +158 val_158 158 val_158 158 +160 val_160 160 val_160 160 +162 val_162 162 val_162 162 +163 val_163 163 val_163 163 +164 val_164 164 val_164 164 +164 val_164 164 val_164 164 +164 val_164 164 val_164 164 +164 val_164 164 val_164 164 +164 val_164 164 val_164 164 +164 val_164 164 val_164 164 +164 val_164 164 val_164 164 +164 val_164 164 val_164 164 +165 val_165 165 val_165 165 +165 val_165 165 val_165 165 +165 val_165 165 val_165 165 +165 val_165 165 val_165 165 +165 val_165 165 val_165 165 +165 val_165 165 val_165 165 +165 val_165 165 val_165 165 +165 val_165 165 val_165 165 +166 val_166 166 val_166 166 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +167 val_167 167 val_167 167 +168 val_168 168 val_168 168 +17 val_17 17 val_17 17 +170 val_170 170 val_170 170 +172 val_172 172 val_172 172 +172 val_172 172 val_172 172 +172 val_172 172 val_172 172 +172 val_172 172 val_172 172 +172 val_172 172 val_172 172 +172 val_172 172 val_172 172 +172 val_172 172 val_172 172 +172 val_172 172 val_172 172 +174 val_174 174 val_174 174 +174 val_174 174 val_174 174 +174 val_174 174 val_174 174 +174 val_174 174 val_174 174 +174 val_174 174 val_174 174 +174 val_174 174 val_174 174 +174 val_174 174 val_174 174 +174 val_174 174 val_174 174 +175 val_175 175 val_175 175 +175 val_175 175 val_175 175 +175 val_175 175 val_175 175 +175 val_175 175 val_175 175 +175 val_175 175 val_175 175 +175 val_175 175 val_175 175 +175 val_175 175 val_175 175 +175 val_175 175 val_175 175 +176 val_176 176 val_176 176 +176 val_176 176 val_176 176 +176 val_176 176 val_176 176 +176 val_176 176 val_176 176 +176 val_176 176 val_176 176 +176 val_176 176 val_176 176 +176 val_176 176 val_176 176 +176 val_176 176 val_176 176 +177 val_177 177 val_177 177 +178 val_178 178 val_178 178 +179 val_179 179 val_179 179 +179 val_179 179 val_179 179 +179 val_179 179 val_179 179 +179 val_179 179 val_179 179 +179 val_179 179 val_179 179 +179 val_179 179 val_179 179 +179 val_179 179 val_179 179 +179 val_179 179 val_179 179 +18 val_18 18 val_18 18 +18 val_18 18 val_18 18 +18 val_18 18 val_18 18 +18 val_18 18 val_18 18 +18 val_18 18 val_18 18 +18 val_18 18 val_18 18 +18 val_18 18 val_18 18 +18 val_18 18 val_18 18 +180 val_180 180 val_180 180 +181 val_181 181 val_181 181 +183 val_183 183 val_183 183 +186 val_186 186 val_186 186 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +187 val_187 187 val_187 187 +189 val_189 189 val_189 189 +19 val_19 19 val_19 19 +190 val_190 190 val_190 190 +191 val_191 191 val_191 191 +191 val_191 191 val_191 191 +191 val_191 191 val_191 191 +191 val_191 191 val_191 191 +191 val_191 191 val_191 191 +191 val_191 191 val_191 191 +191 val_191 191 val_191 191 +191 val_191 191 val_191 191 +192 val_192 192 val_192 192 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +193 val_193 193 val_193 193 +194 val_194 194 val_194 194 +195 val_195 195 val_195 195 +195 val_195 195 val_195 195 +195 val_195 195 val_195 195 +195 val_195 195 val_195 195 +195 val_195 195 val_195 195 +195 val_195 195 val_195 195 +195 val_195 195 val_195 195 +195 val_195 195 val_195 195 +196 val_196 196 val_196 196 +197 val_197 197 val_197 197 +197 val_197 197 val_197 197 +197 val_197 197 val_197 197 +197 val_197 197 val_197 197 +197 val_197 197 val_197 197 +197 val_197 197 val_197 197 +197 val_197 197 val_197 197 +197 val_197 197 val_197 197 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +199 val_199 199 val_199 199 +2 val_2 2 val_2 2 diff --git ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out new file mode 100644 index 0000000..1316b8a --- /dev/null +++ ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out @@ -0,0 +1,320 @@ +PREHOOK: query: create table t1 (id int, key string, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1 (id int, key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: create table t2 (id int, key string, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2 (id int, key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: create table t3 (id int, key string, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t3 +POSTHOOK: query: create table t3 (id int, key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t3 +PREHOOK: query: create table t4 (id int, key string, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t4 +POSTHOOK: query: create table t4 (id int, key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t4 +PREHOOK: query: explain select * from t1 full outer join t2 on t1.id=t2.id join t3 on t2.id=t3.id where t3.id=20 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 full outer join t2 on t1.id=t2.id join t3 on t2.id=t3.id where t3.id=20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (id = 20) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: 20 (type: int) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (id = 20) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (id = 20) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + 2 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8, _col13, _col14 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string), 20 (type: int), _col13 (type: string), _col14 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t2.id=t3.id) where t2.id=20 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t2.id=t3.id) where t2.id=20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (id = 20) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (id = 20) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: 20 (type: int) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (id = 20) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: 20 (type: int) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + 2 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + outputColumnNames: _col1, _col2, _col7, _col8, _col12, _col13, _col14 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col7 (type: string), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t1.id=t3.id) where t2.id=20 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t1.id=t3.id) where t2.id=20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (id = 20) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (id = 20) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: 20 (type: int) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (id = 20) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: 20 (type: int) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + 2 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + outputColumnNames: _col1, _col2, _col7, _col8, _col12, _col13, _col14 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col7 (type: string), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: drop table t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 +PREHOOK: query: drop table t2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: drop table t2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t2 +POSTHOOK: Output: default@t2 +PREHOOK: query: drop table t3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t3 +PREHOOK: Output: default@t3 +POSTHOOK: query: drop table t3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t3 +POSTHOOK: Output: default@t3 +PREHOOK: query: drop table t4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t4 +PREHOOK: Output: default@t4 +POSTHOOK: query: drop table t4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t4 +POSTHOOK: Output: default@t4 diff --git ql/src/test/results/clientpositive/spark/reduce_deduplicate_exclude_join.q.out ql/src/test/results/clientpositive/spark/reduce_deduplicate_exclude_join.q.out new file mode 100644 index 0000000..0a3c821 --- /dev/null +++ ql/src/test/results/clientpositive/spark/reduce_deduplicate_exclude_join.q.out @@ -0,0 +1,91 @@ +PREHOOK: query: explain select * from (select * from src cluster by key) a join src b on a.key = b.key limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from (select * from src cluster by key) a join src b on a.key = b.key limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/router_join_ppr.q.out ql/src/test/results/clientpositive/spark/router_join_ppr.q.out new file mode 100644 index 0000000..8389a81 --- /dev/null +++ ql/src/test/results/clientpositive/spark/router_join_ppr.q.out @@ -0,0 +1,1725 @@ +PREHOOK: query: EXPLAIN EXTENDED + FROM + src a + RIGHT OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED + FROM + src a + RIGHT OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_RIGHTOUTERJOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_TABREF + TOK_TABNAME + srcpart + b + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + b + ds + '2008-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string), ds (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + /srcpart/ds=2008-04-09/hr=11 [b] + /srcpart/ds=2008-04-09/hr=12 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + filter mappings: + 1 [0, 1] + filter predicates: + 0 + 1 {(VALUE._col1 = '2008-04-08')} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + RIGHT OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + RIGHT OUTER JOIN + srcpart b + ON (a.key = b.key AND b.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 +PREHOOK: query: EXPLAIN EXTENDED + FROM + srcpart a + RIGHT OUTER JOIN + src b + ON (a.key = b.key AND a.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED + FROM + srcpart a + RIGHT OUTER JOIN + src b + ON (a.key = b.key AND a.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_RIGHTOUTERJOIN + TOK_TABREF + TOK_TABNAME + srcpart + a + TOK_TABREF + TOK_TABNAME + src + b + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + ds + '2008-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [a] + /srcpart/ds=2008-04-08/hr=12 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col7, _col8 + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + srcpart a + RIGHT OUTER JOIN + src b + ON (a.key = b.key AND a.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + srcpart a + RIGHT OUTER JOIN + src b + ON (a.key = b.key AND a.ds = '2008-04-08') + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 +PREHOOK: query: EXPLAIN EXTENDED + FROM + src a + RIGHT OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED + FROM + src a + RIGHT OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_RIGHTOUTERJOIN + TOK_TABREF + TOK_TABNAME + src + a + TOK_TABREF + TOK_TABNAME + srcpart + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + = + . + TOK_TABLE_OR_COL + b + ds + '2008-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + src a + RIGHT OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + src a + RIGHT OUTER JOIN + srcpart b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND b.ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 +PREHOOK: query: EXPLAIN EXTENDED + FROM + srcpart a + RIGHT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND a.ds = '2008-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED + FROM + srcpart a + RIGHT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND a.ds = '2008-04-08' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_RIGHTOUTERJOIN + TOK_TABREF + TOK_TABNAME + srcpart + a + TOK_TABREF + TOK_TABNAME + src + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_WHERE + AND + AND + AND + AND + > + . + TOK_TABLE_OR_COL + a + key + 10 + < + . + TOK_TABLE_OR_COL + a + key + 20 + > + . + TOK_TABLE_OR_COL + b + key + 15 + < + . + TOK_TABLE_OR_COL + b + key + 25 + = + . + TOK_TABLE_OR_COL + a + ds + '2008-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [b] + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 15) and (key < 25)) (type: boolean) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string), ds (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [a] + /srcpart/ds=2008-04-08/hr=12 [a] + /srcpart/ds=2008-04-09/hr=11 [a] + /srcpart/ds=2008-04-09/hr=12 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col7, _col8 + Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (((_col0 > 10) and (_col0 < 20)) and (_col2 = '2008-04-08')) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM + srcpart a + RIGHT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND a.ds = '2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: FROM + srcpart a + RIGHT OUTER JOIN + src b + ON (a.key = b.key) + SELECT a.key, a.value, b.key, b.value + WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 AND a.ds = '2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +17 val_17 17 val_17 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +19 val_19 19 val_19 diff --git ql/src/test/results/clientpositive/spark/semijoin.q.out ql/src/test/results/clientpositive/spark/semijoin.q.out new file mode 100644 index 0000000..18fc837 --- /dev/null +++ ql/src/test/results/clientpositive/spark/semijoin.q.out @@ -0,0 +1,2643 @@ +PREHOOK: query: create table t1 as select cast(key as int) key, value from src where key <= 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: create table t1 as select cast(key as int) key, value from src where key <= 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: select * from t1 sort by key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 sort by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +10 val_10 +PREHOOK: query: create table t2 as select cast(2*key as int) key, value from t1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1 +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: create table t2 as select cast(2*key as int) key, value from t1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: select * from t2 sort by key +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t2 sort by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +4 val_2 +8 val_4 +10 val_5 +10 val_5 +10 val_5 +16 val_8 +18 val_9 +20 val_10 +PREHOOK: query: create table t3 as select * from (select * from t1 union all select * from t2) b +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Output: database:default +PREHOOK: Output: default@t3 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: create table t3 as select * from (select * from t1 union all select * from t2) b +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t3 +PREHOOK: query: select * from t3 sort by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select * from t3 sort by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +2 val_2 +4 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +8 val_4 +8 val_8 +9 val_9 +10 val_10 +10 val_5 +10 val_5 +10 val_5 +16 val_8 +18 val_9 +20 val_10 +PREHOOK: query: create table t4 (key int, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t4 +POSTHOOK: query: create table t4 (key int, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t4 +PREHOOK: query: select * from t4 +PREHOOK: type: QUERY +PREHOOK: Input: default@t4 +#### A masked pattern was here #### +POSTHOOK: query: select * from t4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t4 +#### A masked pattern was here #### +PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 13 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 53 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 53 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +4 val_4 +8 val_8 +10 val_10 +PREHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 11 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 95 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +4 val_2 +8 val_4 +10 val_5 +10 val_5 +10 val_5 +PREHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t4 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t4 +#### A masked pattern was here #### +PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 15) and key is not null) (type: boolean) + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 8 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 8 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 35 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 35 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +val_0 +val_0 +val_0 +val_10 +val_2 +val_4 +val_5 +val_5 +val_5 +val_8 +val_9 +PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 95 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((value < 'val_10') and key is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +PREHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: t3 + Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 5) and key is not null) (type: boolean) + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 8 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 8 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 35 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 35 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +val_10 +val_8 +val_9 +PREHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 0 Data size: 95 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((key > 5) and (value <= 'val_20')) and key is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 22 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 2) and key is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 95 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +4 val_2 +8 val_4 +10 val_5 +10 val_5 +10 val_5 +PREHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 11 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +2 +4 +4 +5 +5 +5 +8 +8 +9 +10 +10 +10 +10 +PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (2 * key) is not null (type: boolean) + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (2 * _col0) (type: int) + sort order: + + Map-reduce partition columns: (2 * _col0) (type: int) + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 13 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 53 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 53 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +8 val_8 +PREHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 95 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Semi Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +4 val_4 4 val_2 +8 val_8 8 val_4 +10 val_10 10 val_5 +10 val_10 10 val_5 +10 val_10 10 val_5 +PREHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: int), value (type: string) + Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 203 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 203 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 1 Data size: 203 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 203 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 203 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +10 val_10 +PREHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 11 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + Left Semi Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + 2 + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +4 +4 +8 +8 +10 +10 +10 +10 +PREHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Semi Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + 2 + outputColumnNames: _col0 + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +8 +8 +10 +10 +10 +10 +PREHOOK: query: explain select a.key from t1 a right outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.key from t1 a right outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Left Semi Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + 2 + outputColumnNames: _col0 + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t1 a right outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t1 a right outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +NULL +NULL +NULL +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +8 +8 +10 +10 +10 +10 +PREHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + Left Semi Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + 2 + outputColumnNames: _col0 + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +NULL +NULL +NULL +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +8 +8 +10 +10 +10 +10 +PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 22 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + 2 + outputColumnNames: _col0 + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +8 +8 +10 +10 +10 +10 +16 +18 +20 +PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 22 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + 2 + outputColumnNames: _col0 + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +NULL +NULL +NULL +NULL +NULL +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +8 +8 +10 +10 +10 +10 +PREHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 22 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 22 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 46 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + Outer Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + 2 + outputColumnNames: _col0 + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 407 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +NULL +NULL +NULL +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +2 +4 +4 +5 +5 +5 +8 +8 +9 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 23 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 13 Data size: 53 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 58 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 58 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 14 Data size: 58 Basic stats: COMPLETE Column stats: NONE + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 58 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 58 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +8 +8 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +16 +18 +20 +PREHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 95 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value is not null and (key > 100)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out new file mode 100644 index 0000000..d45cdd3 --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out @@ -0,0 +1,155 @@ +PREHOOK: query: explain +create table noskew as select a.* from src a join src b on a.key=b.key order by a.key limit 30 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain +create table noskew as select a.* from src a join src b on a.key=b.key order by a.key limit 30 +POSTHOOK: type: CREATETABLE_AS_SELECT +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 30 + Statistics: Num rows: 30 Data size: 300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.noskew + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: key string, value string + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.noskew + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: create table noskew as select a.* from src a join src b on a.key=b.key order by a.key limit 30 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@noskew +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: create table noskew as select a.* from src a join src b on a.key=b.key order by a.key limit 30 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@noskew +PREHOOK: query: select * from noskew +PREHOOK: type: QUERY +PREHOOK: Input: default@noskew +#### A masked pattern was here #### +POSTHOOK: query: select * from noskew +POSTHOOK: type: QUERY +POSTHOOK: Input: default@noskew +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +114 val_114 diff --git ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out new file mode 100644 index 0000000..c3c95cd --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out @@ -0,0 +1,474 @@ +PREHOOK: query: -- This is to test the union->selectstar->filesink and skewjoin optimization +-- Union of 2 map-reduce subqueries is performed for the skew join +-- There is no need to write the temporary results of the sub-queries, and then read them +-- again to process the union. The union can be removed completely. +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- Since this test creates sub-directories for the output, it might be easier to run the test +-- only on hadoop 23 + +CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: -- This is to test the union->selectstar->filesink and skewjoin optimization +-- Union of 2 map-reduce subqueries is performed for the skew join +-- There is no need to write the temporary results of the sub-queries, and then read them +-- again to process the union. The union can be removed completely. +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- Since this test creates sub-directories for the output, it might be easier to run the test +-- only on hadoop 23 + +CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((3)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((3)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: -- a simple join query with skew on both the tables on the join key + +EXPLAIN +SELECT * FROM T1 a JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- a simple join query with skew on both the tables on the join key + +EXPLAIN +SELECT * FROM T1 a JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +2 12 2 22 +3 13 3 13 +8 18 8 18 +8 18 8 18 +8 28 8 18 +8 28 8 18 +PREHOOK: query: -- test outer joins also + +EXPLAIN +SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- test outer joins also + +EXPLAIN +SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +NULL NULL 4 14 +NULL NULL 5 15 +2 12 2 22 +3 13 3 13 +8 18 8 18 +8 18 8 18 +8 28 8 18 +8 28 8 18 +PREHOOK: query: create table DEST1(key1 STRING, val1 STRING, key2 STRING, val2 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST1 +POSTHOOK: query: create table DEST1(key1 STRING, val1 STRING, key2 STRING, val2 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST1 +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE DEST1 +SELECT * FROM T1 a JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE DEST1 +SELECT * FROM T1 a JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + +PREHOOK: query: INSERT OVERWRITE TABLE DEST1 +SELECT * FROM T1 a JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Output: default@dest1 +POSTHOOK: query: INSERT OVERWRITE TABLE DEST1 +SELECT * FROM T1 a JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key1 SIMPLE [(t1)a.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.key2 SIMPLE [(t2)b.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val1 SIMPLE [(t1)a.FieldSchema(name:val, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val2 SIMPLE [(t2)b.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM DEST1 +ORDER BY key1, key2, val1, val2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM DEST1 +ORDER BY key1, key2, val1, val2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +2 12 2 22 +3 13 3 13 +8 18 8 18 +8 18 8 18 +8 28 8 18 +8 28 8 18 +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE DEST1 +SELECT * FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE DEST1 +SELECT * FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + +PREHOOK: query: INSERT OVERWRITE TABLE DEST1 +SELECT * FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Output: default@dest1 +POSTHOOK: query: INSERT OVERWRITE TABLE DEST1 +SELECT * FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key1 SIMPLE [(t1)a.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.key2 SIMPLE [(t2)b.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val1 SIMPLE [(t1)a.FieldSchema(name:val, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val2 SIMPLE [(t2)b.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM DEST1 +ORDER BY key1, key2, val1, val2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM DEST1 +ORDER BY key1, key2, val1, val2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +NULL NULL 4 14 +NULL NULL 5 15 +2 12 2 22 +3 13 3 13 +8 18 8 18 +8 18 8 18 +8 28 8 18 +8 28 8 18 diff --git ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out new file mode 100644 index 0000000..010e4d4 --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out @@ -0,0 +1,172 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t3 +PREHOOK: query: -- This is to test the union->selectstar->filesink and skewjoin optimization +-- Union of 3 map-reduce subqueries is performed for the skew join +-- There is no need to write the temporary results of the sub-queries, and then read them +-- again to process the union. The union can be removed completely. +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- Since this test creates sub-directories for the output table, it might be easier +-- to run the test only on hadoop 23 + +EXPLAIN +SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +PREHOOK: type: QUERY +POSTHOOK: query: -- This is to test the union->selectstar->filesink and skewjoin optimization +-- Union of 3 map-reduce subqueries is performed for the skew join +-- There is no need to write the temporary results of the sub-queries, and then read them +-- again to process the union. The union can be removed completely. +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) +-- Since this test creates sub-directories for the output table, it might be easier +-- to run the test only on hadoop 23 + +EXPLAIN +SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +ORDER BY a.key, b.key, c.key, a.val, b.val, c.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +ORDER BY a.key, b.key, c.key, a.val, b.val, c.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +2 12 2 22 2 12 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out new file mode 100644 index 0000000..47ebe96 --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out @@ -0,0 +1,422 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((3)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((3)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: -- a simple join query with skew on both the tables on the join key +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- a simple join query with skew on both the tables on the join key +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +2 12 2 22 +3 13 3 13 +8 18 8 18 +8 18 8 18 +8 28 8 18 +8 28 8 18 +PREHOOK: query: -- test outer joins also + +EXPLAIN +SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- test outer joins also + +EXPLAIN +SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +NULL NULL 4 14 +NULL NULL 5 15 +2 12 2 22 +3 13 3 13 +8 18 8 18 +8 18 8 18 +8 28 8 18 +8 28 8 18 +PREHOOK: query: -- an aggregation at the end should not change anything + +EXPLAIN +SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- an aggregation at the end should not change anything + +EXPLAIN +SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +6 +PREHOOK: query: EXPLAIN +SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + Select Operator + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +8 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out new file mode 100644 index 0000000..16c1ec1 --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out @@ -0,0 +1,170 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: drop table array_valued_T1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table array_valued_T1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table array_valued_T1 (key string, value array) SKEWED BY (key) ON ((8)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@array_valued_T1 +POSTHOOK: query: create table array_valued_T1 (key string, value array) SKEWED BY (key) ON ((8)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@array_valued_T1 +PREHOOK: query: insert overwrite table array_valued_T1 select key, array(value) from T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@array_valued_t1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table array_valued_T1 select key, array(value) from T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@array_valued_t1 +POSTHOOK: Lineage: array_valued_t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: array_valued_t1.value EXPRESSION [(t1)t1.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: -- This test is to verify the skew join compile optimization when the join is followed by a lateral view +-- adding a order by at the end to make the results deterministic + +explain +select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val +PREHOOK: type: QUERY +POSTHOOK: query: -- This test is to verify the skew join compile optimization when the join is followed by a lateral view +-- adding a order by at the end to make the results deterministic + +explain +select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: array) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col6 (type: array) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + SELECT * : (no compute) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col1 (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + UDTF Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val +ORDER BY key, val +PREHOOK: type: QUERY +PREHOOK: Input: default@array_valued_t1 +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val +ORDER BY key, val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@array_valued_t1 +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 ["11"] 11 +2 ["12"] 12 +3 ["13"] 13 +7 ["17"] 17 +8 ["18"] 18 +8 ["18"] 18 +8 ["28"] 28 +8 ["28"] 28 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out new file mode 100644 index 0000000..c7cbb33 --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out @@ -0,0 +1,214 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: -- This test is to verify the skew join compile optimization when the join is followed +-- by a union. Both sides of a union consist of a join, which should have used +-- skew join compile time optimization. +-- adding an order by at the end to make the results deterministic + +EXPLAIN +select * from +( + select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key + union all + select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key +) subq1 +PREHOOK: type: QUERY +POSTHOOK: query: -- This test is to verify the skew join compile optimization when the join is followed +-- by a union. Both sides of a union consist of a join, which should have used +-- skew join compile time optimization. +-- adding an order by at the end to make the results deterministic + +EXPLAIN +select * from +( + select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key + union all + select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key +) subq1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from +( + select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key + union all + select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key +) subq1 +ORDER BY key, val1, val2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from +( + select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key + union all + select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key +) subq1 +ORDER BY key, val1, val2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +2 12 22 +2 12 22 +3 13 13 +3 13 13 +8 18 18 +8 18 18 +8 18 18 +8 18 18 +8 28 18 +8 28 18 +8 28 18 +8 28 18 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out new file mode 100644 index 0000000..dc0ed2a --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out @@ -0,0 +1,130 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key, val) ON ((2, 12), (8, 18)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key, val) ON ((2, 12), (8, 18)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key, val) ON ((3, 13), (8, 18)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key, val) ON ((3, 13), (8, 18)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: -- Both the join tables are skewed by 2 keys, and one of the skewed values +-- is common to both the tables. The join key matches the skewed key set. +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +PREHOOK: type: QUERY +POSTHOOK: query: -- Both the join tables are skewed by 2 keys, and one of the skewed values +-- is common to both the tables. The join key matches the skewed key set. +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and val is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and val is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +3 13 3 13 +8 18 8 18 +8 18 8 18 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt13.q.out ql/src/test/results/clientpositive/spark/skewjoinopt13.q.out new file mode 100644 index 0000000..c002d5e --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoinopt13.q.out @@ -0,0 +1,195 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: CREATE TABLE T3(key STRING, val STRING) +SKEWED BY (val) ON ((12)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: CREATE TABLE T3(key STRING, val STRING) +SKEWED BY (val) ON ((12)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t3 +PREHOOK: query: -- This test is for skewed join compile time optimization for more than 2 tables. +-- The join key for table 3 is different from the join key used for joining +-- tables 1 and 2. Table 3 is skewed, but since one of the join sources for table +-- 3 consist of a sub-query which contains a join, the compile time skew join +-- optimization is not performed +-- adding a order by at the end to make the results deterministic + +EXPLAIN +select * +from +T1 a join T2 b on a.key = b.key +join T3 c on a.val = c.val +PREHOOK: type: QUERY +POSTHOOK: query: -- This test is for skewed join compile time optimization for more than 2 tables. +-- The join key for table 3 is different from the join key used for joining +-- tables 1 and 2. Table 3 is skewed, but since one of the join sources for table +-- 3 consist of a sub-query which contains a join, the compile time skew join +-- optimization is not performed +-- adding a order by at the end to make the results deterministic + +EXPLAIN +select * +from +T1 a join T2 b on a.key = b.key +join T3 c on a.val = c.val +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: val is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and val is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col4} {VALUE._col5} + 1 {VALUE._col0} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from +T1 a join T2 b on a.key = b.key +join T3 c on a.val = c.val +order by a.key, b.key, c.key, a.val, b.val, c.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select * +from +T1 a join T2 b on a.key = b.key +join T3 c on a.val = c.val +order by a.key, b.key, c.key, a.val, b.val, c.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +2 12 2 22 2 12 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out new file mode 100644 index 0000000..1374536 --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out @@ -0,0 +1,199 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: CREATE TABLE T3(key STRING, val STRING) +SKEWED BY (val) ON ((12)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: CREATE TABLE T3(key STRING, val STRING) +SKEWED BY (val) ON ((12)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t3 +PREHOOK: query: -- This test is for skewed join compile time optimization for more than 2 tables. +-- The join key for table 3 is different from the join key used for joining +-- tables 1 and 2. Tables 1 and 3 are skewed. Since one of the join sources for table +-- 3 consist of a sub-query which contains a join, the compile time skew join +-- optimization is not enabled for table 3, but it is used for the first join between +-- tables 1 and 2 +-- adding a order by at the end to make the results deterministic + +EXPLAIN +select * +from +T1 a join T2 b on a.key = b.key +join T3 c on a.val = c.val +PREHOOK: type: QUERY +POSTHOOK: query: -- This test is for skewed join compile time optimization for more than 2 tables. +-- The join key for table 3 is different from the join key used for joining +-- tables 1 and 2. Tables 1 and 3 are skewed. Since one of the join sources for table +-- 3 consist of a sub-query which contains a join, the compile time skew join +-- optimization is not enabled for table 3, but it is used for the first join between +-- tables 1 and 2 +-- adding a order by at the end to make the results deterministic + +EXPLAIN +select * +from +T1 a join T2 b on a.key = b.key +join T3 c on a.val = c.val +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: val is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and val is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col4} {VALUE._col5} + 1 {VALUE._col0} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from +T1 a join T2 b on a.key = b.key +join T3 c on a.val = c.val +order by a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select * +from +T1 a join T2 b on a.key = b.key +join T3 c on a.val = c.val +order by a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +2 12 2 22 2 12 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out new file mode 100644 index 0000000..e197185 --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out @@ -0,0 +1,464 @@ +PREHOOK: query: CREATE TABLE tmpT1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmpT1 +POSTHOOK: query: CREATE TABLE tmpT1(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmpT1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tmpt1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tmpt1 +PREHOOK: query: -- testing skew on other data types - int +CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: -- testing skew on other data types - int +CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: INSERT OVERWRITE TABLE T1 SELECT key, val FROM tmpT1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tmpt1 +PREHOOK: Output: default@t1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE T1 SELECT key, val FROM tmpT1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmpt1 +POSTHOOK: Output: default@t1 +POSTHOOK: Lineage: t1.key EXPRESSION [(tmpt1)tmpt1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t1.val SIMPLE [(tmpt1)tmpt1.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE tmpT2(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmpT2 +POSTHOOK: query: CREATE TABLE tmpT2(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmpT2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE tmpT2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tmpt2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE tmpT2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tmpt2 +PREHOOK: query: CREATE TABLE T2(key INT, val STRING) SKEWED BY (key) ON ((3)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key INT, val STRING) SKEWED BY (key) ON ((3)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: INSERT OVERWRITE TABLE T2 SELECT key, val FROM tmpT2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tmpt2 +PREHOOK: Output: default@t2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE T2 SELECT key, val FROM tmpT2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmpt2 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.key EXPRESSION [(tmpt2)tmpt2.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t2.val SIMPLE [(tmpt2)tmpt2.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: -- The skewed key is a integer column. +-- Otherwise this test is similar to skewjoinopt1.q +-- Both the joined tables are skewed, and the joined column +-- is an integer +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The skewed key is a integer column. +-- Otherwise this test is similar to skewjoinopt1.q +-- Both the joined tables are skewed, and the joined column +-- is an integer +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +2 12 2 22 +3 13 3 13 +8 18 8 18 +8 18 8 18 +8 28 8 18 +8 28 8 18 +PREHOOK: query: -- test outer joins also + +EXPLAIN +SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- test outer joins also + +EXPLAIN +SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +NULL NULL 4 14 +NULL NULL 5 15 +2 12 2 22 +3 13 3 13 +8 18 8 18 +8 18 8 18 +8 28 8 18 +8 28 8 18 +PREHOOK: query: -- an aggregation at the end should not change anything + +EXPLAIN +SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- an aggregation at the end should not change anything + +EXPLAIN +SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 4 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 4 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +6 +PREHOOK: query: EXPLAIN +SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 7 Data size: 33 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 7 Data size: 33 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +8 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out new file mode 100644 index 0000000..42c307b --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out @@ -0,0 +1,130 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((3)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((3)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is +-- skewed by one column. Ths join is performed on the both the columns +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +PREHOOK: type: QUERY +POSTHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is +-- skewed by one column. Ths join is performed on the both the columns +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and val is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and val is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +3 13 3 13 +8 18 8 18 +8 18 8 18 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out new file mode 100644 index 0000000..4d11d91 --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out @@ -0,0 +1,285 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is +-- skewed by one column. Ths join is performed on the first skewed column +-- The skewed value for the jon key is common to both the tables. +-- In this case, the skewed join value is not repeated in the filter. +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is +-- skewed by one column. Ths join is performed on the first skewed column +-- The skewed value for the jon key is common to both the tables. +-- In this case, the skewed join value is not repeated in the filter. +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +2 12 2 22 +3 13 3 13 +8 18 8 18 +8 18 8 18 +8 28 8 18 +8 28 8 18 +PREHOOK: query: DROP TABLE T1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: DROP TABLE T1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 +PREHOOK: query: DROP TABLE T2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: DROP TABLE T2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t2 +POSTHOOK: Output: default@t2 +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is +-- skewed by one column. Ths join is performed on the both the columns +-- In this case, the skewed join value is repeated in the filter. + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +PREHOOK: type: QUERY +POSTHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is +-- skewed by one column. Ths join is performed on the both the columns +-- In this case, the skewed join value is repeated in the filter. + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and val is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and val is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val +ORDER BY a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +3 13 3 13 +8 18 8 18 +8 18 8 18 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt18.q.out ql/src/test/results/clientpositive/spark/skewjoinopt18.q.out new file mode 100644 index 0000000..f615dfa --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoinopt18.q.out @@ -0,0 +1,162 @@ +PREHOOK: query: CREATE TABLE tmpT1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmpT1 +POSTHOOK: query: CREATE TABLE tmpT1(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmpT1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tmpt1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tmpt1 +PREHOOK: query: -- testing skew on other data types - int +CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: -- testing skew on other data types - int +CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: INSERT OVERWRITE TABLE T1 SELECT key, val FROM tmpT1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tmpt1 +PREHOOK: Output: default@t1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE T1 SELECT key, val FROM tmpT1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmpt1 +POSTHOOK: Output: default@t1 +POSTHOOK: Lineage: t1.key EXPRESSION [(tmpt1)tmpt1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t1.val SIMPLE [(tmpt1)tmpt1.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: -- Tke skewed column is same in both the tables, however it is +-- INT in one of the tables, and STRING in the other table + +CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((3)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: -- Tke skewed column is same in both the tables, however it is +-- INT in one of the tables, and STRING in the other table + +CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((3)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: -- Once HIVE-3445 is fixed, the compile time skew join optimization would be +-- applicable here. Till the above jira is fixed, it would be performed as a +-- regular join +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Once HIVE-3445 is fixed, the compile time skew join optimization would be +-- applicable here. Till the above jira is fixed, it would be performed as a +-- regular join +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: int), val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +2 12 2 22 +3 13 3 13 +8 18 8 18 +8 18 8 18 +8 28 8 18 +8 28 8 18 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out new file mode 100644 index 0000000..b636bfb --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out @@ -0,0 +1,137 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) +CLUSTERED BY (key) INTO 4 BUCKETS +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) +CLUSTERED BY (key) INTO 4 BUCKETS +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: -- add a test where the skewed key is also the bucketized key +-- it should not matter, and the compile time skewed join +-- optimization is performed +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- add a test where the skewed key is also the bucketized key +-- it should not matter, and the compile time skewed join +-- optimization is performed +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +2 12 2 22 +3 13 3 13 +8 18 8 18 +8 18 8 18 +8 28 8 18 +8 28 8 18 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out new file mode 100644 index 0000000..91be6c9 --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out @@ -0,0 +1,137 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: -- add a test where the skewed key is also the bucketized/sorted key +-- it should not matter, and the compile time skewed join +-- optimization is performed +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- add a test where the skewed key is also the bucketized/sorted key +-- it should not matter, and the compile time skewed join +-- optimization is performed +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +2 12 2 22 +3 13 3 13 +8 18 8 18 +8 18 8 18 +8 28 8 18 +8 28 8 18 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out new file mode 100644 index 0000000..46d3557 --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out @@ -0,0 +1,230 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: -- a simple query with skew on both the tables. One of the skewed +-- value is common to both the tables. The skewed value should not be +-- repeated in the filter. +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- a simple query with skew on both the tables. One of the skewed +-- value is common to both the tables. The skewed value should not be +-- repeated in the filter. +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +2 12 2 22 +3 13 3 13 +8 18 8 18 +8 18 8 18 +8 28 8 18 +8 28 8 18 +PREHOOK: query: -- test outer joins also + +EXPLAIN +SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- test outer joins also + +EXPLAIN +SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +NULL NULL 4 14 +NULL NULL 5 15 +1 11 NULL NULL +2 12 2 22 +3 13 3 13 +7 17 NULL NULL +8 18 8 18 +8 18 8 18 +8 28 8 18 +8 28 8 18 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out new file mode 100644 index 0000000..8b042ef --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out @@ -0,0 +1,226 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: -- only of the tables of the join (the left table of the join) is skewed +-- the skewed filter would still be applied to both the tables +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- only of the tables of the join (the left table of the join) is skewed +-- the skewed filter would still be applied to both the tables +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +2 12 2 22 +3 13 3 13 +8 18 8 18 +8 18 8 18 +8 28 8 18 +8 28 8 18 +PREHOOK: query: -- the order of the join should not matter, just confirming +EXPLAIN +SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- the order of the join should not matter, just confirming +EXPLAIN +SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +2 22 2 12 +3 13 3 13 +8 18 8 18 +8 18 8 18 +8 18 8 28 +8 18 8 28 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out new file mode 100644 index 0000000..064a86d --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out @@ -0,0 +1,135 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((3)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((3)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is +-- skewed by one column. Ths join is performed on the first skewed column +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is +-- skewed by one column. Ths join is performed on the first skewed column +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +2 12 2 22 +3 13 3 13 +8 18 8 18 +8 18 8 18 +8 28 8 18 +8 28 8 18 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out new file mode 100644 index 0000000..cff5eec --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out @@ -0,0 +1,137 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key, val) ON ((2, 12), (8, 18)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key, val) ON ((2, 12), (8, 18)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key, val) ON ((3, 13), (8, 18)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key, val) ON ((3, 13), (8, 18)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: -- Both the join tables are skewed by 2 keys, and one of the skewed values +-- is common to both the tables. The join key is a subset of the skewed key set: +-- it only contains the first skewed key for both the tables +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Both the join tables are skewed by 2 keys, and one of the skewed values +-- is common to both the tables. The join key is a subset of the skewed key set: +-- it only contains the first skewed key for both the tables +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key +ORDER BY a.key, b.key, a.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +2 12 2 22 +3 13 3 13 +8 18 8 18 +8 18 8 18 +8 28 8 18 +8 28 8 18 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out new file mode 100644 index 0000000..b99e5d4 --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out @@ -0,0 +1,166 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t3 +PREHOOK: query: -- This test is for validating skewed join compile time optimization for more than +-- 2 tables. The join key is the same, and so a 3-way join would be performed. +-- 2 of the 3 tables are skewed on the join key +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +PREHOOK: type: QUERY +POSTHOOK: query: -- This test is for validating skewed join compile time optimization for more than +-- 2 tables. The join key is the same, and so a 3-way join would be performed. +-- 2 of the 3 tables are skewed on the join key +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +ORDER BY a.key, b.key, c.key, a.val, b.val, c.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +ORDER BY a.key, b.key, c.key, a.val, b.val, c.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +2 12 2 22 2 12 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out new file mode 100644 index 0000000..07d6b61 --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out @@ -0,0 +1,164 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) +SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t3 +PREHOOK: query: -- This test is for validating skewed join compile time optimization for more than +-- 2 tables. The join key is the same, and so a 3-way join would be performed. +-- 1 of the 3 tables are skewed on the join key +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +PREHOOK: type: QUERY +POSTHOOK: query: -- This test is for validating skewed join compile time optimization for more than +-- 2 tables. The join key is the same, and so a 3-way join would be performed. +-- 1 of the 3 tables are skewed on the join key +-- adding a order by at the end to make the results deterministic + +EXPLAIN +SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +ORDER BY a.key, b.key, c.key, a.val, b.val, c.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key +ORDER BY a.key, b.key, c.key, a.val, b.val, c.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +2 12 2 22 2 12 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out new file mode 100644 index 0000000..8188487 --- /dev/null +++ ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out @@ -0,0 +1,316 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) +SKEWED BY (key) ON ((2)) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: -- no skew join compile time optimization would be performed if one of the +-- join sources is a sub-query consisting of a union all +-- adding a order by at the end to make the results deterministic +EXPLAIN +select * from +( +select key, val from T1 + union all +select key, val from T1 +) subq1 +join T2 b on subq1.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- no skew join compile time optimization would be performed if one of the +-- join sources is a sub-query consisting of a union all +-- adding a order by at the end to make the results deterministic +EXPLAIN +select * from +( +select key, val from T1 + union all +select key, val from T1 +) subq1 +join T2 b on subq1.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Union 4 (GROUP PARTITION-LEVEL SORT, 1) + Union 4 <- Map 3 (NONE, 0), Map 5 (NONE, 0) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: t1 + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: t1 + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 4 + Vertex: Union 4 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from +( +select key, val from T1 + union all +select key, val from T1 +) subq1 +join T2 b on subq1.key = b.key +ORDER BY subq1.key, b.key, subq1.val, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from +( +select key, val from T1 + union all +select key, val from T1 +) subq1 +join T2 b on subq1.key = b.key +ORDER BY subq1.key, b.key, subq1.val, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +2 12 2 22 +2 12 2 22 +3 13 3 13 +3 13 3 13 +8 18 8 18 +8 18 8 18 +8 18 8 18 +8 18 8 18 +8 28 8 18 +8 28 8 18 +8 28 8 18 +8 28 8 18 +PREHOOK: query: -- no skew join compile time optimization would be performed if one of the +-- join sources is a sub-query consisting of a group by +EXPLAIN +select * from +( +select key, count(1) as cnt from T1 group by key +) subq1 +join T2 b on subq1.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- no skew join compile time optimization would be performed if one of the +-- join sources is a sub-query consisting of a group by +EXPLAIN +select * from +( +select key, count(1) as cnt from T1 group by key +) subq1 +join T2 b on subq1.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from +( +select key, count(1) as cnt from T1 group by key +) subq1 +join T2 b on subq1.key = b.key +ORDER BY subq1.key, b.key, subq1.cnt, b.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from +( +select key, count(1) as cnt from T1 group by key +) subq1 +join T2 b on subq1.key = b.key +ORDER BY subq1.key, b.key, subq1.cnt, b.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +2 1 2 22 +3 1 3 13 +8 2 8 18 +8 2 8 18 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out new file mode 100644 index 0000000..43ea4cd --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out @@ -0,0 +1,510 @@ +PREHOOK: query: create table hive_test_smb_bucket1 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hive_test_smb_bucket1 +POSTHOOK: query: create table hive_test_smb_bucket1 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hive_test_smb_bucket1 +PREHOOK: query: create table hive_test_smb_bucket2 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hive_test_smb_bucket2 +POSTHOOK: query: create table hive_test_smb_bucket2 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hive_test_smb_bucket2 +PREHOOK: query: -- empty partitions (HIVE-3205) +explain extended +SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2 +FROM hive_test_smb_bucket1 a JOIN +hive_test_smb_bucket2 b +ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL +PREHOOK: type: QUERY +POSTHOOK: query: -- empty partitions (HIVE-3205) +explain extended +SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2 +FROM hive_test_smb_bucket1 a JOIN +hive_test_smb_bucket2 b +ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + hive_test_smb_bucket1 + a + TOK_TABREF + TOK_TABNAME + hive_test_smb_bucket2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + k1 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + ds + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + k2 + TOK_WHERE + and + and + = + . + TOK_TABLE_OR_COL + a + ds + '2010-10-15' + = + . + TOK_TABLE_OR_COL + b + ds + '2010-10-15' + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + b + key + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map 3 + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col6 (type: int), _col7 (type: string), '2010-10-15' (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types int:string:string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2 +FROM hive_test_smb_bucket1 a JOIN +hive_test_smb_bucket2 b +ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@hive_test_smb_bucket1 +PREHOOK: Input: default@hive_test_smb_bucket2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2 +FROM hive_test_smb_bucket1 a JOIN +hive_test_smb_bucket2 b +ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hive_test_smb_bucket1 +POSTHOOK: Input: default@hive_test_smb_bucket2 +#### A masked pattern was here #### +PREHOOK: query: explain extended +SELECT /* + MAPJOIN(a) */ b.key as k1, b.value, b.ds, a.key as k2 +FROM hive_test_smb_bucket1 a JOIN +hive_test_smb_bucket2 b +ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +SELECT /* + MAPJOIN(a) */ b.key as k1, b.value, b.ds, a.key as k2 +FROM hive_test_smb_bucket1 a JOIN +hive_test_smb_bucket2 b +ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + hive_test_smb_bucket1 + a + TOK_TABREF + TOK_TABNAME + hive_test_smb_bucket2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + a + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + k1 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + ds + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + key + k2 + TOK_WHERE + and + and + = + . + TOK_TABLE_OR_COL + a + ds + '2010-10-15' + = + . + TOK_TABLE_OR_COL + b + ds + '2010-10-15' + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + b + key + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map 3 + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col6 (type: int), _col7 (type: string), '2010-10-15' (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types int:string:string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /* + MAPJOIN(a) */ b.key as k1, b.value, b.ds, a.key as k2 +FROM hive_test_smb_bucket1 a JOIN +hive_test_smb_bucket2 b +ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@hive_test_smb_bucket1 +PREHOOK: Input: default@hive_test_smb_bucket2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /* + MAPJOIN(a) */ b.key as k1, b.value, b.ds, a.key as k2 +FROM hive_test_smb_bucket1 a JOIN +hive_test_smb_bucket2 b +ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hive_test_smb_bucket1 +POSTHOOK: Input: default@hive_test_smb_bucket2 +#### A masked pattern was here #### +PREHOOK: query: insert overwrite table hive_test_smb_bucket1 partition (ds='2010-10-15') select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@hive_test_smb_bucket1@ds=2010-10-15 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table hive_test_smb_bucket1 partition (ds='2010-10-15') select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@hive_test_smb_bucket1@ds=2010-10-15 +POSTHOOK: Lineage: hive_test_smb_bucket1 PARTITION(ds=2010-10-15).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hive_test_smb_bucket1 PARTITION(ds=2010-10-15).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table hive_test_smb_bucket2 partition (ds='2010-10-15') select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@hive_test_smb_bucket2@ds=2010-10-15 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table hive_test_smb_bucket2 partition (ds='2010-10-15') select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@hive_test_smb_bucket2@ds=2010-10-15 +POSTHOOK: Lineage: hive_test_smb_bucket2 PARTITION(ds=2010-10-15).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hive_test_smb_bucket2 PARTITION(ds=2010-10-15).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +create table smb_mapjoin9_results as +SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2 +FROM hive_test_smb_bucket1 a JOIN +hive_test_smb_bucket2 b +ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain +create table smb_mapjoin9_results as +SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2 +FROM hive_test_smb_bucket1 a JOIN +hive_test_smb_bucket2 b +ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL +POSTHOOK: type: CREATETABLE_AS_SELECT +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string), ds (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col6, _col7, _col8 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col6 (type: int), _col7 (type: string), _col8 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_mapjoin9_results + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: k1 int, value string, ds string, k2 int + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_mapjoin9_results + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: create table smb_mapjoin9_results as +SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2 +FROM hive_test_smb_bucket1 a JOIN +hive_test_smb_bucket2 b +ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@hive_test_smb_bucket1 +PREHOOK: Input: default@hive_test_smb_bucket1@ds=2010-10-15 +PREHOOK: Input: default@hive_test_smb_bucket2 +PREHOOK: Input: default@hive_test_smb_bucket2@ds=2010-10-15 +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_mapjoin9_results +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: create table smb_mapjoin9_results as +SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2 +FROM hive_test_smb_bucket1 a JOIN +hive_test_smb_bucket2 b +ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@hive_test_smb_bucket1 +POSTHOOK: Input: default@hive_test_smb_bucket1@ds=2010-10-15 +POSTHOOK: Input: default@hive_test_smb_bucket2 +POSTHOOK: Input: default@hive_test_smb_bucket2@ds=2010-10-15 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_mapjoin9_results +PREHOOK: query: drop table smb_mapjoin9_results +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@smb_mapjoin9_results +PREHOOK: Output: default@smb_mapjoin9_results +POSTHOOK: query: drop table smb_mapjoin9_results +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@smb_mapjoin9_results +POSTHOOK: Output: default@smb_mapjoin9_results +PREHOOK: query: drop table hive_test_smb_bucket1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hive_test_smb_bucket1 +PREHOOK: Output: default@hive_test_smb_bucket1 +POSTHOOK: query: drop table hive_test_smb_bucket1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hive_test_smb_bucket1 +POSTHOOK: Output: default@hive_test_smb_bucket1 +PREHOOK: query: drop table hive_test_smb_bucket2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hive_test_smb_bucket2 +PREHOOK: Output: default@hive_test_smb_bucket2 +POSTHOOK: query: drop table hive_test_smb_bucket2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hive_test_smb_bucket2 +POSTHOOK: Output: default@hive_test_smb_bucket2 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out new file mode 100644 index 0000000..bfde76c --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out @@ -0,0 +1,712 @@ +PREHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket_1 +POSTHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket_1 +PREHOOK: query: create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket_2 +POSTHOOK: query: create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket_2 +PREHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket_3 +POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket_3 +PREHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +#### A masked pattern was here #### +PREHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +#### A masked pattern was here #### +1 val_1 NULL NULL +3 val_3 NULL NULL +4 val_4 NULL NULL +5 val_5 NULL NULL +10 val_10 NULL NULL +PREHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +#### A masked pattern was here #### +NULL NULL 20 val_20 +NULL NULL 23 val_23 +NULL NULL 25 val_25 +NULL NULL 30 val_30 +PREHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +#### A masked pattern was here #### +1 val_1 NULL NULL +3 val_3 NULL NULL +4 val_4 NULL NULL +5 val_5 NULL NULL +10 val_10 NULL NULL +NULL NULL 20 val_20 +NULL NULL 23 val_23 +NULL NULL 25 val_25 +NULL NULL 30 val_30 +PREHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +#### A masked pattern was here #### +PREHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +#### A masked pattern was here #### +1 val_1 NULL NULL +3 val_3 NULL NULL +4 val_4 NULL NULL +5 val_5 NULL NULL +10 val_10 NULL NULL +PREHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +#### A masked pattern was here #### +NULL NULL 20 val_20 +NULL NULL 23 val_23 +NULL NULL 25 val_25 +NULL NULL 30 val_30 +PREHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +#### A masked pattern was here #### +1 val_1 NULL NULL +3 val_3 NULL NULL +4 val_4 NULL NULL +5 val_5 NULL NULL +10 val_10 NULL NULL +NULL NULL 20 val_20 +NULL NULL 23 val_23 +NULL NULL 25 val_25 +NULL NULL 30 val_30 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out new file mode 100644 index 0000000..341a8c3 --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out @@ -0,0 +1,141 @@ +PREHOOK: query: create table tmp_smb_bucket_10(userid int, pageid int, postid int, type string) partitioned by (ds string) CLUSTERED BY (userid) SORTED BY (pageid, postid, type, userid) INTO 2 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tmp_smb_bucket_10 +POSTHOOK: query: create table tmp_smb_bucket_10(userid int, pageid int, postid int, type string) partitioned by (ds string) CLUSTERED BY (userid) SORTED BY (pageid, postid, type, userid) INTO 2 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tmp_smb_bucket_10 +PREHOOK: query: alter table tmp_smb_bucket_10 add partition (ds = '1') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@tmp_smb_bucket_10 +POSTHOOK: query: alter table tmp_smb_bucket_10 add partition (ds = '1') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@tmp_smb_bucket_10 +POSTHOOK: Output: default@tmp_smb_bucket_10@ds=1 +PREHOOK: query: alter table tmp_smb_bucket_10 add partition (ds = '2') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@tmp_smb_bucket_10 +POSTHOOK: query: alter table tmp_smb_bucket_10 add partition (ds = '2') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@tmp_smb_bucket_10 +POSTHOOK: Output: default@tmp_smb_bucket_10@ds=2 +PREHOOK: query: -- add dummy files to make sure that the number of files in each partition is same as number of buckets + +load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tmp_smb_bucket_10@ds=1 +POSTHOOK: query: -- add dummy files to make sure that the number of files in each partition is same as number of buckets + +load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tmp_smb_bucket_10@ds=1 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tmp_smb_bucket_10@ds=1 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tmp_smb_bucket_10@ds=1 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tmp_smb_bucket_10@ds=2 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tmp_smb_bucket_10@ds=2 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tmp_smb_bucket_10@ds=2 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tmp_smb_bucket_10@ds=2 +PREHOOK: query: explain +select /*+mapjoin(a)*/ * from tmp_smb_bucket_10 a join tmp_smb_bucket_10 b +on (a.ds = '1' and b.ds = '2' and + a.userid = b.userid and + a.pageid = b.pageid and + a.postid = b.postid and + a.type = b.type) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a)*/ * from tmp_smb_bucket_10 a join tmp_smb_bucket_10 b +on (a.ds = '1' and b.ds = '2' and + a.userid = b.userid and + a.pageid = b.pageid and + a.postid = b.postid and + a.type = b.type) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 414 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((userid is not null and pageid is not null) and postid is not null) and type is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: userid (type: int), pageid (type: int), postid (type: int), type (type: string) + sort order: ++++ + Map-reduce partition columns: userid (type: int), pageid (type: int), postid (type: int), type (type: string) + Statistics: Num rows: 1 Data size: 138 Basic stats: COMPLETE Column stats: NONE + value expressions: ds (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 414 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((userid is not null and pageid is not null) and postid is not null) and type is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: userid (type: int), pageid (type: int), postid (type: int), type (type: string) + sort order: ++++ + Map-reduce partition columns: userid (type: int), pageid (type: int), postid (type: int), type (type: string) + Statistics: Num rows: 1 Data size: 138 Basic stats: COMPLETE Column stats: NONE + value expressions: ds (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {KEY.reducesinkkey2} {KEY.reducesinkkey3} {VALUE._col0} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {KEY.reducesinkkey2} {KEY.reducesinkkey3} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col11 (type: string), _col12 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out new file mode 100644 index 0000000..cad4063 --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out @@ -0,0 +1,1699 @@ +PREHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl1 +POSTHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl1 +PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl2 +POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl2 +PREHOOK: query: insert overwrite table tbl1 +select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tbl1 +select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl1 +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tbl2 +select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tbl2 +select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl2 +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- The mapjoin is being performed as part of sub-query. It should be converted to a sort-merge join +explain +select count(*) from ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +PREHOOK: type: QUERY +POSTHOOK: query: -- The mapjoin is being performed as part of sub-query. It should be converted to a sort-merge join +explain +select count(*) from ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +22 +PREHOOK: query: -- The mapjoin is being performed as part of sub-query. It should be converted to a sort-merge join +-- Add a order by at the end to make the results deterministic. +explain +select key, count(*) from +( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +group by key +order by key +PREHOOK: type: QUERY +POSTHOOK: query: -- The mapjoin is being performed as part of sub-query. It should be converted to a sort-merge join +-- Add a order by at the end to make the results deterministic. +explain +select key, count(*) from +( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +group by key +order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from +( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +group by key +order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from +( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +group by key +order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +0 9 +2 1 +4 1 +5 9 +8 1 +9 1 +PREHOOK: query: -- The mapjoin is being performed as part of more than one sub-query. It should be converted to a sort-merge join +explain +select count(*) from +( + select key, count(*) from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +PREHOOK: type: QUERY +POSTHOOK: query: -- The mapjoin is being performed as part of more than one sub-query. It should be converted to a sort-merge join +explain +select count(*) from +( + select key, count(*) from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +( + select key, count(*) from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +( + select key, count(*) from + ( + select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +6 +PREHOOK: query: -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join. +explain +select /*+mapjoin(subq1)*/ count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join. +explain +select /*+mapjoin(subq1)*/ count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(subq1)*/ count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(subq1)*/ count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join, although there is more than one level of sub-query +explain +select /*+mapjoin(subq2)*/ count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join, although there is more than one level of sub-query +explain +select /*+mapjoin(subq2)*/ count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(subq2)*/ count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(subq2)*/ count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- Both the big table and the small table are nested sub-queries i.e more then 1 level of sub-query. +-- The join should be converted to a sort-merge join +explain +select /*+mapjoin(subq2)*/ count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Both the big table and the small table are nested sub-queries i.e more then 1 level of sub-query. +-- The join should be converted to a sort-merge join +explain +select /*+mapjoin(subq2)*/ count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(subq2)*/ count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(subq2)*/ count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +#### A masked pattern was here #### +20 +PREHOOK: query: -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters and the join key +-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one +-- item, but that is not part of the join key. +explain +select /*+mapjoin(subq1)*/ count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters and the join key +-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one +-- item, but that is not part of the join key. +explain +select /*+mapjoin(subq1)*/ count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(subq1)*/ count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(subq1)*/ count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side +-- join should be performed +explain +select /*+mapjoin(subq1)*/ count(*) from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side +-- join should be performed +explain +select /*+mapjoin(subq1)*/ count(*) from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(subq1)*/ count(*) from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(subq1)*/ count(*) from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +22 +PREHOOK: query: -- The small table is a sub-query and the big table is not. +-- It should be converted to a sort-merge join. +explain +select /*+mapjoin(subq1)*/ count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The small table is a sub-query and the big table is not. +-- It should be converted to a sort-merge join. +explain +select /*+mapjoin(subq1)*/ count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(subq1)*/ count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(subq1)*/ count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- The big table is a sub-query and the small table is not. +-- It should be converted to a sort-merge join. +explain +select /*+mapjoin(a)*/ count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The big table is a sub-query and the small table is not. +-- It should be converted to a sort-merge join. +explain +select /*+mapjoin(a)*/ count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a)*/ count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. +-- It should be converted to to a sort-merge join +explain +select /*+mapjoin(subq1, subq2)*/ count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on (subq1.key = subq2.key) + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +PREHOOK: type: QUERY +POSTHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. +-- It should be converted to to a sort-merge join +explain +select /*+mapjoin(subq1, subq2)*/ count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on (subq1.key = subq2.key) + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(subq1, subq2)*/ count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(subq1, subq2)*/ count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +56 +PREHOOK: query: -- The mapjoin is being performed on a nested sub-query, and an aggregation is performed after that. +-- The join should be converted to a sort-merge join +explain +select count(*) from ( + select /*+mapjoin(subq2)*/ subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +PREHOOK: type: QUERY +POSTHOOK: query: -- The mapjoin is being performed on a nested sub-query, and an aggregation is performed after that. +-- The join should be converted to a sort-merge join +explain +select count(*) from ( + select /*+mapjoin(subq2)*/ subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from ( + select /*+mapjoin(subq2)*/ subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from ( + select /*+mapjoin(subq2)*/ subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out new file mode 100644 index 0000000..482268c --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out @@ -0,0 +1,1118 @@ +PREHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table1 +POSTHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table3 +POSTHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table3 +PREHOOK: query: CREATE TABLE test_table4 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table4 +POSTHOOK: query: CREATE TABLE test_table4 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table4 +PREHOOK: query: CREATE TABLE test_table5 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table5 +POSTHOOK: query: CREATE TABLE test_table5 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table5 +PREHOOK: query: CREATE TABLE test_table6 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table6 +POSTHOOK: query: CREATE TABLE test_table6 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table6 +PREHOOK: query: CREATE TABLE test_table7 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table7 +POSTHOOK: query: CREATE TABLE test_table7 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table7 +PREHOOK: query: CREATE TABLE test_table8 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table8 +POSTHOOK: query: CREATE TABLE test_table8 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table8 +PREHOOK: query: INSERT OVERWRITE TABLE test_table1 +SELECT * FROM src WHERE key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE test_table1 +SELECT * FROM src WHERE key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 +SELECT * FROM src WHERE key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 +SELECT * FROM src WHERE key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE test_table3 +SELECT * FROM src WHERE key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table3 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 +SELECT * FROM src WHERE key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table3 +POSTHOOK: Lineage: test_table3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE test_table4 +SELECT * FROM src WHERE key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table4 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE test_table4 +SELECT * FROM src WHERE key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table4 +POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE test_table5 +SELECT * FROM src WHERE key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table5 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE test_table5 +SELECT * FROM src WHERE key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table5 +POSTHOOK: Lineage: test_table5.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE test_table6 +SELECT * FROM src WHERE key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table6 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE test_table6 +SELECT * FROM src WHERE key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table6 +POSTHOOK: Lineage: test_table6.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table6.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE test_table7 +SELECT * FROM src WHERE key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table7 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE test_table7 +SELECT * FROM src WHERE key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table7 +POSTHOOK: Lineage: test_table7.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table7.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE test_table8 +SELECT * FROM src WHERE key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table8 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE test_table8 +SELECT * FROM src WHERE key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table8 +POSTHOOK: Lineage: test_table8.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table8.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Mapjoin followed by a aggregation should be performed in a single MR job upto 7 tables +EXPLAIN +SELECT /*+ mapjoin(b, c, d, e, f, g) */ count(*) +FROM test_table1 a JOIN test_table2 b ON a.key = b.key +JOIN test_table3 c ON a.key = c.key +JOIN test_table4 d ON a.key = d.key +JOIN test_table5 e ON a.key = e.key +JOIN test_table6 f ON a.key = f.key +JOIN test_table7 g ON a.key = g.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Mapjoin followed by a aggregation should be performed in a single MR job upto 7 tables +EXPLAIN +SELECT /*+ mapjoin(b, c, d, e, f, g) */ count(*) +FROM test_table1 a JOIN test_table2 b ON a.key = b.key +JOIN test_table3 c ON a.key = c.key +JOIN test_table4 d ON a.key = d.key +JOIN test_table5 e ON a.key = e.key +JOIN test_table6 f ON a.key = f.key +JOIN test_table7 g ON a.key = g.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1), Map 8 (GROUP PARTITION-LEVEL SORT, 1), Map 9 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: g + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 9 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + Inner Join 0 to 4 + Inner Join 0 to 5 + Inner Join 0 to 6 + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + Statistics: Num rows: 66 Data size: 264 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 66 Data size: 264 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ mapjoin(b, c, d, e, f, g) */ count(*) +FROM test_table1 a JOIN test_table2 b ON a.key = b.key +JOIN test_table3 c ON a.key = c.key +JOIN test_table4 d ON a.key = d.key +JOIN test_table5 e ON a.key = e.key +JOIN test_table6 f ON a.key = f.key +JOIN test_table7 g ON a.key = g.key +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table4 +PREHOOK: Input: default@test_table5 +PREHOOK: Input: default@test_table6 +PREHOOK: Input: default@test_table7 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ mapjoin(b, c, d, e, f, g) */ count(*) +FROM test_table1 a JOIN test_table2 b ON a.key = b.key +JOIN test_table3 c ON a.key = c.key +JOIN test_table4 d ON a.key = d.key +JOIN test_table5 e ON a.key = e.key +JOIN test_table6 f ON a.key = f.key +JOIN test_table7 g ON a.key = g.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table4 +POSTHOOK: Input: default@test_table5 +POSTHOOK: Input: default@test_table6 +POSTHOOK: Input: default@test_table7 +#### A masked pattern was here #### +4378 +PREHOOK: query: -- It should be automatically converted to a sort-merge join followed by a groupby in +-- a single MR job +EXPLAIN +SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +PREHOOK: type: QUERY +POSTHOOK: query: -- It should be automatically converted to a sort-merge join followed by a groupby in +-- a single MR job +EXPLAIN +SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1), Map 8 (GROUP PARTITION-LEVEL SORT, 1), Map 9 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: g + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 9 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + Left Outer Join0 to 3 + Left Outer Join0 to 4 + Left Outer Join0 to 5 + Left Outer Join0 to 6 + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + Statistics: Num rows: 132 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 132 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table4 +PREHOOK: Input: default@test_table5 +PREHOOK: Input: default@test_table6 +PREHOOK: Input: default@test_table7 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table4 +POSTHOOK: Input: default@test_table5 +POSTHOOK: Input: default@test_table6 +POSTHOOK: Input: default@test_table7 +#### A masked pattern was here #### +4378 +PREHOOK: query: EXPLAIN +SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +LEFT OUTER JOIN test_table8 h ON a.key = h.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +LEFT OUTER JOIN test_table8 h ON a.key = h.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 10 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1), Map 8 (GROUP PARTITION-LEVEL SORT, 1), Map 9 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 10 + Map Operator Tree: + TableScan + alias: h + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: g + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 9 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + Left Outer Join0 to 3 + Left Outer Join0 to 4 + Left Outer Join0 to 5 + Left Outer Join0 to 6 + Left Outer Join0 to 7 + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + Statistics: Num rows: 154 Data size: 616 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 154 Data size: 616 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +LEFT OUTER JOIN test_table8 h ON a.key = h.key +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table4 +PREHOOK: Input: default@test_table5 +PREHOOK: Input: default@test_table6 +PREHOOK: Input: default@test_table7 +PREHOOK: Input: default@test_table8 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +LEFT OUTER JOIN test_table8 h ON a.key = h.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table4 +POSTHOOK: Input: default@test_table5 +POSTHOOK: Input: default@test_table6 +POSTHOOK: Input: default@test_table7 +POSTHOOK: Input: default@test_table8 +#### A masked pattern was here #### +13126 +PREHOOK: query: -- outer join with max 16 aliases +EXPLAIN +SELECT a.* +FROM test_table1 a +LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +LEFT OUTER JOIN test_table8 h ON a.key = h.key +LEFT OUTER JOIN test_table4 i ON a.key = i.key +LEFT OUTER JOIN test_table5 j ON a.key = j.key +LEFT OUTER JOIN test_table6 k ON a.key = k.key +LEFT OUTER JOIN test_table7 l ON a.key = l.key +LEFT OUTER JOIN test_table8 m ON a.key = m.key +LEFT OUTER JOIN test_table7 n ON a.key = n.key +LEFT OUTER JOIN test_table8 o ON a.key = o.key +LEFT OUTER JOIN test_table4 p ON a.key = p.key +LEFT OUTER JOIN test_table5 q ON a.key = q.key +LEFT OUTER JOIN test_table6 r ON a.key = r.key +LEFT OUTER JOIN test_table7 s ON a.key = s.key +LEFT OUTER JOIN test_table8 t ON a.key = t.key +PREHOOK: type: QUERY +POSTHOOK: query: -- outer join with max 16 aliases +EXPLAIN +SELECT a.* +FROM test_table1 a +LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +LEFT OUTER JOIN test_table8 h ON a.key = h.key +LEFT OUTER JOIN test_table4 i ON a.key = i.key +LEFT OUTER JOIN test_table5 j ON a.key = j.key +LEFT OUTER JOIN test_table6 k ON a.key = k.key +LEFT OUTER JOIN test_table7 l ON a.key = l.key +LEFT OUTER JOIN test_table8 m ON a.key = m.key +LEFT OUTER JOIN test_table7 n ON a.key = n.key +LEFT OUTER JOIN test_table8 o ON a.key = o.key +LEFT OUTER JOIN test_table4 p ON a.key = p.key +LEFT OUTER JOIN test_table5 q ON a.key = q.key +LEFT OUTER JOIN test_table6 r ON a.key = r.key +LEFT OUTER JOIN test_table7 s ON a.key = s.key +LEFT OUTER JOIN test_table8 t ON a.key = t.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 10 (GROUP PARTITION-LEVEL SORT, 1), Map 11 (GROUP PARTITION-LEVEL SORT, 1), Map 12 (GROUP PARTITION-LEVEL SORT, 1), Map 13 (GROUP PARTITION-LEVEL SORT, 1), Map 14 (GROUP PARTITION-LEVEL SORT, 1), Map 15 (GROUP PARTITION-LEVEL SORT, 1), Map 16 (GROUP PARTITION-LEVEL SORT, 1), Map 17 (GROUP PARTITION-LEVEL SORT, 1), Map 22 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 6 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1), Map 8 (GROUP PARTITION-LEVEL SORT, 1), Map 9 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 18 (GROUP PARTITION-LEVEL SORT, 1), Map 19 (GROUP PARTITION-LEVEL SORT, 1), Map 20 (GROUP PARTITION-LEVEL SORT, 1), Map 21 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 10 + Map Operator Tree: + TableScan + alias: n + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 11 + Map Operator Tree: + TableScan + alias: o + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 12 + Map Operator Tree: + TableScan + alias: l + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 13 + Map Operator Tree: + TableScan + alias: m + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 14 + Map Operator Tree: + TableScan + alias: j + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 15 + Map Operator Tree: + TableScan + alias: k + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 16 + Map Operator Tree: + TableScan + alias: h + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 17 + Map Operator Tree: + TableScan + alias: i + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 18 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 19 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 20 + Map Operator Tree: + TableScan + alias: r + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 21 + Map Operator Tree: + TableScan + alias: q + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 22 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: g + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map 9 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + Left Outer Join0 to 3 + Left Outer Join0 to 4 + Left Outer Join0 to 5 + Left Outer Join0 to 6 + Left Outer Join0 to 7 + Left Outer Join0 to 8 + Left Outer Join0 to 9 + Left Outer Join0 to 10 + Left Outer Join0 to 11 + Left Outer Join0 to 12 + Left Outer Join0 to 13 + Left Outer Join0 to 14 + Left Outer Join0 to 15 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 330 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 330 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + Left Outer Join0 to 3 + Left Outer Join0 to 4 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + 2 + 3 + 4 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1452 Data size: 5808 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1452 Data size: 5808 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1452 Data size: 5808 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_18.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_18.q.out new file mode 100644 index 0000000..e06a6ff --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_18.q.out @@ -0,0 +1,468 @@ +PREHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table1 +POSTHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1@ds=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1@ds=1 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Output: default@test_table2@ds=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Output: default@test_table2@ds=1 +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from test_table1 where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +500 +PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +247 +PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 2 = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +253 +PREHOOK: query: select count(*) from test_table1 tablesample (bucket 1 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 1 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +247 +PREHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +253 +PREHOOK: query: select count(*) from test_table2 where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +500 +PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +247 +PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +253 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +247 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +253 +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation, one of the buckets should be empty + +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238 +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation, one of the buckets should be empty + +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 238) (type: boolean) + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Output: default@test_table2@ds=2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Output: default@test_table2@ds=2 +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from test_table2 where ds = '2' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '2' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +2 +PREHOOK: query: select count(*) from test_table2 where ds = '2' and hash(key) % 2 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '2' and hash(key) % 2 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +2 +PREHOOK: query: select count(*) from test_table2 where ds = '2' and hash(key) % 2 = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '2' and hash(key) % 2 = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +0 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '2' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '2' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +2 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '2' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '2' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=2 +#### A masked pattern was here #### +0 +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '3') +SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '3') +SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 3 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=2 +PREHOOK: Output: default@test_table2@ds=2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=2 +POSTHOOK: Output: default@test_table2@ds=2 +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key SIMPLE [(test_table2)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(test_table2)a.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from test_table2 where ds = '3' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '3' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +0 +PREHOOK: query: select count(*) from test_table2 where ds = '3' and hash(key) % 2 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '3' and hash(key) % 2 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +0 +PREHOOK: query: select count(*) from test_table2 where ds = '3' and hash(key) % 2 = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '3' and hash(key) % 2 = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +0 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '3' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 2) s where ds = '3' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +0 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '3' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s where ds = '3' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +0 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_19.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_19.q.out new file mode 100644 index 0000000..868d891 --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_19.q.out @@ -0,0 +1,263 @@ +PREHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table1 +POSTHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1@ds=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1@ds=1 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Output: default@test_table2@ds=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Output: default@test_table2@ds=1 +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from test_table1 where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +500 +PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +36 +PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +40 +PREHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 12 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 where ds = '1' and hash(key) % 16 = 12 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +29 +PREHOOK: query: select count(*) from test_table1 tablesample (bucket 1 out of 16) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 1 out of 16) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +36 +PREHOOK: query: select count(*) from test_table1 tablesample (bucket 6 out of 16) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 6 out of 16) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +40 +PREHOOK: query: select count(*) from test_table1 tablesample (bucket 13 out of 16) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 13 out of 16) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +#### A masked pattern was here #### +29 +PREHOOK: query: select count(*) from test_table2 where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +500 +PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +36 +PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +40 +PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 12 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 16 = 12 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +29 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 16) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 1 out of 16) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +36 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 6 out of 16) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 6 out of 16) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +40 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 13 out of 16) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 13 out of 16) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +29 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out new file mode 100644 index 0000000..b7e6715 --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out @@ -0,0 +1,720 @@ +PREHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket_1 +POSTHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket_1 +PREHOOK: query: create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket_2 +POSTHOOK: query: create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket_2 +PREHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket_3 +POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket_3 +PREHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +4 val_4 4 val_4 +10 val_10 10 val_10 +PREHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +1 val_1 NULL NULL +3 val_3 NULL NULL +4 val_4 4 val_4 +5 val_5 NULL NULL +10 val_10 10 val_10 +PREHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +4 val_4 4 val_4 +10 val_10 10 val_10 +NULL NULL 17 val_17 +NULL NULL 19 val_19 +NULL NULL 20 val_20 +NULL NULL 23 val_23 +PREHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_1 a full outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +1 val_1 NULL NULL +3 val_3 NULL NULL +4 val_4 4 val_4 +5 val_5 NULL NULL +10 val_10 10 val_10 +NULL NULL 17 val_17 +NULL NULL 19 val_19 +NULL NULL 20 val_20 +NULL NULL 23 val_23 +PREHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +4 val_4 4 val_4 +10 val_10 10 val_10 +PREHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +1 val_1 NULL NULL +3 val_3 NULL NULL +4 val_4 4 val_4 +5 val_5 NULL NULL +10 val_10 10 val_10 +PREHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +4 val_4 4 val_4 +10 val_10 10 val_10 +NULL NULL 17 val_17 +NULL NULL 19 val_19 +NULL NULL 20 val_20 +NULL NULL 23 val_23 +PREHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_1 a full outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +1 val_1 NULL NULL +3 val_3 NULL NULL +4 val_4 4 val_4 +5 val_5 NULL NULL +10 val_10 10 val_10 +NULL NULL 17 val_17 +NULL NULL 19 val_19 +NULL NULL 20 val_20 +NULL NULL 23 val_23 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out new file mode 100644 index 0000000..292f596 --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out @@ -0,0 +1,366 @@ +PREHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key int, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table1 +POSTHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key int, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key STRING, value1 STRING, value2 string) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (key STRING, value1 STRING, value2 string) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1@ds=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1@ds=1 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- with different datatypes. This should be a map-reduce operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- with different datatypes. This should be a map-reduce operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToString(_col0) (type: string) + sort order: + + Map-reduce partition columns: UDFToString(_col0) (type: string) + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Output: default@test_table2@ds=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Output: default@test_table2@ds=1 +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from test_table2 where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +500 +PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +242 +PREHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 where ds = '1' and hash(key) % 2 = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table2@ds=1 +#### A masked pattern was here #### +258 +PREHOOK: query: CREATE TABLE test_table3 (key STRING, value1 int, value2 string) PARTITIONED BY (ds STRING) +CLUSTERED BY (value1) SORTED BY (value1) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table3 +POSTHOOK: query: CREATE TABLE test_table3 (key STRING, value1 int, value2 string) PARTITIONED BY (ds STRING) +CLUSTERED BY (value1) SORTED BY (value1) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table3 +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation, although the bucketing positions dont match +EXPLAIN +INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') +SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation, although the bucketing positions dont match +EXPLAIN +INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') +SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: int), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') +SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Output: default@test_table3@ds=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') +SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Output: default@test_table3@ds=1 +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value1 SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value2 SIMPLE [(test_table1)a.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from test_table3 where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table3 where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +500 +PREHOOK: query: select count(*) from test_table3 where ds = '1' and hash(value1) % 2 = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table3 where ds = '1' and hash(value1) % 2 = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +247 +PREHOOK: query: select count(*) from test_table3 where ds = '1' and hash(value1) % 2 = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table3 where ds = '1' and hash(value1) % 2 = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +253 +PREHOOK: query: select count(*) from test_table3 tablesample (bucket 1 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table3 tablesample (bucket 1 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +247 +PREHOOK: query: select count(*) from test_table3 tablesample (bucket 2 out of 2) s where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table3 tablesample (bucket 2 out of 2) s where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table3@ds=1 +#### A masked pattern was here #### +253 +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- However, since an expression is being selected, it should involve a reducer +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key+a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- However, since an expression is being selected, it should involve a reducer +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') +SELECT a.key+a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (key + key) (type: int), value (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToString(_col0) (type: string) + sort order: + + Map-reduce partition columns: UDFToString(_col0) (type: string) + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-3 + Stats-Aggr Operator + diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out new file mode 100644 index 0000000..8bc5dd6 --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out @@ -0,0 +1,538 @@ +PREHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table1 +POSTHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1@ds=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1@ds=1 +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: drop table test_table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table2 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: drop table test_table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table2 +POSTHOOK: Output: default@test_table2 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key desc) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key desc) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort orders does not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort orders does not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: - + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: drop table test_table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table2 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: drop table test_table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table2 +POSTHOOK: Output: default@test_table2 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key, value) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key, value) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort columns do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort columns do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: drop table test_table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table2 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: drop table test_table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table2 +POSTHOOK: Output: default@test_table2 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (value) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (value) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort columns do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the sort columns do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: drop table test_table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table2 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: drop table test_table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table2 +POSTHOOK: Output: default@test_table2 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the number of buckets do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since the number of buckets do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: drop table test_table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table2 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: drop table test_table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table2 +POSTHOOK: Output: default@test_table2 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY (ds STRING) +CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since sort columns do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-reduce operation since sort columns do not match +EXPLAIN +INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') +SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Extract + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-3 + Stats-Aggr Operator + diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_22.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_22.q.out new file mode 100644 index 0000000..8c0de58 --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_22.q.out @@ -0,0 +1,292 @@ +PREHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table1 +POSTHOOK: query: -- Create two bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_table1 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Output: default@test_table2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from test_table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +#### A masked pattern was here #### +500 +PREHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +#### A masked pattern was here #### +253 +PREHOOK: query: select count(*) from test_table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +500 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +253 +PREHOOK: query: drop table test_table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table1 +PREHOOK: Output: default@test_table1 +POSTHOOK: query: drop table test_table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table1 +POSTHOOK: Output: default@test_table1 +PREHOOK: query: drop table test_table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table2 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: drop table test_table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table2 +POSTHOOK: Output: default@test_table2 +PREHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) +CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table1 +POSTHOOK: query: CREATE TABLE test_table1 (key INT, value STRING) +CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) +CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) +CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table +-- This should be a map-only operation +EXPLAIN INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_table1 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Output: default@test_table2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 +SELECT * FROM test_table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table2.key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select count(*) from test_table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +#### A masked pattern was here #### +500 +PREHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table1 tablesample (bucket 2 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +#### A masked pattern was here #### +253 +PREHOOK: query: select count(*) from test_table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +500 +PREHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from test_table2 tablesample (bucket 2 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +253 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out new file mode 100644 index 0000000..efa38d4 --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out @@ -0,0 +1,338 @@ +PREHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket_1 +POSTHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket_1 +PREHOOK: query: create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket_2 +POSTHOOK: query: create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket_2 +PREHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket_3 +POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket_3 +PREHOOK: query: explain +select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key = 5)) (type: boolean) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 5 (type: int) + sort order: + + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 52 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key = 5)) (type: boolean) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 5 (type: int) + sort order: + + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 55 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key = 5)) (type: boolean) + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 5 (type: int) + sort order: + + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key = 5)) (type: boolean) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 5 (type: int) + sort order: + + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 14 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 5 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 67 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 = 5) (type: boolean) + Statistics: Num rows: 8 Data size: 33 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), 5 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 33 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 33 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 15 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 5 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 15 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 15 Data size: 61 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- explain +-- select * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join src c on a.key=c.value + +-- select a.key from smb_bucket_1 a + +explain +select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 +PREHOOK: type: QUERY +POSTHOOK: query: -- explain +-- select * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join src c on a.key=c.value + +-- select a.key from smb_bucket_1 a + +explain +select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1), Reducer 6 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key = 5)) (type: boolean) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 5 (type: int) + sort order: + + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 52 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key = 5)) (type: boolean) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 5 (type: int) + sort order: + + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 55 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key = 5)) (type: boolean) + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 5 (type: int) + sort order: + + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key = 5)) (type: boolean) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 5 (type: int) + sort order: + + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 14 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 5 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 67 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 = 5) (type: boolean) + Statistics: Num rows: 8 Data size: 33 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), 5 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 33 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 33 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 15 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 5 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 15 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 15 Data size: 61 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out new file mode 100644 index 0000000..ebe22b4 --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out @@ -0,0 +1,716 @@ +PREHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket_1 +POSTHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket_1 +PREHOOK: query: create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket_2 +POSTHOOK: query: create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket_2 +PREHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket_3 +POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket_3 +PREHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_2 a join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_2 a join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_2 a join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_2 a join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +20 val_20 20 val_20 +23 val_23 23 val_23 +PREHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_2 a left outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_2 a left outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_2 a left outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_2 a left outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +20 val_20 20 val_20 +23 val_23 23 val_23 +25 val_25 NULL NULL +30 val_30 NULL NULL +PREHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_2 a right outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_2 a right outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_2 a right outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_2 a right outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +NULL NULL 4 val_4 +NULL NULL 10 val_10 +NULL NULL 17 val_17 +NULL NULL 19 val_19 +20 val_20 20 val_20 +23 val_23 23 val_23 +PREHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_2 a full outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a)*/ * from smb_bucket_2 a full outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_2 a full outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket_2 a full outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +NULL NULL 4 val_4 +NULL NULL 10 val_10 +NULL NULL 17 val_17 +NULL NULL 19 val_19 +20 val_20 20 val_20 +23 val_23 23 val_23 +25 val_25 NULL NULL +30 val_30 NULL NULL +PREHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_2 a join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_2 a join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_2 a join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_2 a join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +20 val_20 20 val_20 +23 val_23 23 val_23 +PREHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_2 a left outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_2 a left outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_2 a left outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_2 a left outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +20 val_20 20 val_20 +23 val_23 23 val_23 +25 val_25 NULL NULL +30 val_30 NULL NULL +PREHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_2 a right outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_2 a right outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_2 a right outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_2 a right outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +NULL NULL 4 val_4 +NULL NULL 10 val_10 +NULL NULL 17 val_17 +NULL NULL 19 val_19 +20 val_20 20 val_20 +23 val_23 23 val_23 +PREHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_2 a full outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(b)*/ * from smb_bucket_2 a full outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_2 a full outer join smb_bucket_3 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket_2 a full outer join smb_bucket_3 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +NULL NULL 4 val_4 +NULL NULL 10 val_10 +NULL NULL 17 val_17 +NULL NULL 19 val_19 +20 val_20 20 val_20 +23 val_23 23 val_23 +25 val_25 NULL NULL +30 val_30 NULL NULL diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out new file mode 100644 index 0000000..c5ff5b8 --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out @@ -0,0 +1,1325 @@ +PREHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket_1 +POSTHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket_1 +PREHOOK: query: create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket_2 +POSTHOOK: query: create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket_2 +PREHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket_3 +POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket_3 +PREHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +PREHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +PREHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +1 val_1 NULL NULL NULL NULL +3 val_3 NULL NULL NULL NULL +4 val_4 NULL NULL NULL NULL +5 val_5 NULL NULL NULL NULL +10 val_10 NULL NULL NULL NULL +PREHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +NULL NULL NULL NULL 4 val_4 +NULL NULL NULL NULL 10 val_10 +NULL NULL NULL NULL 17 val_17 +NULL NULL NULL NULL 19 val_19 +NULL NULL NULL NULL 20 val_20 +NULL NULL NULL NULL 23 val_23 +PREHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Outer Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +1 val_1 NULL NULL NULL NULL +3 val_3 NULL NULL NULL NULL +4 val_4 NULL NULL NULL NULL +NULL NULL NULL NULL 4 val_4 +5 val_5 NULL NULL NULL NULL +10 val_10 NULL NULL NULL NULL +NULL NULL NULL NULL 10 val_10 +NULL NULL NULL NULL 17 val_17 +NULL NULL NULL NULL 19 val_19 +NULL NULL NULL NULL 20 val_20 +NULL NULL NULL NULL 23 val_23 +PREHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +NULL NULL 20 val_20 20 val_20 +NULL NULL 23 val_23 23 val_23 +PREHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +NULL NULL 20 val_20 20 val_20 +NULL NULL 23 val_23 23 val_23 +NULL NULL 25 val_25 NULL NULL +NULL NULL 30 val_30 NULL NULL +PREHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +NULL NULL NULL NULL 4 val_4 +NULL NULL NULL NULL 10 val_10 +NULL NULL NULL NULL 17 val_17 +NULL NULL NULL NULL 19 val_19 +NULL NULL 20 val_20 20 val_20 +NULL NULL 23 val_23 23 val_23 +PREHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Outer Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +NULL NULL NULL NULL 4 val_4 +NULL NULL NULL NULL 10 val_10 +NULL NULL NULL NULL 17 val_17 +NULL NULL NULL NULL 19 val_19 +NULL NULL 20 val_20 20 val_20 +NULL NULL 23 val_23 23 val_23 +NULL NULL 25 val_25 NULL NULL +NULL NULL 30 val_30 NULL NULL +PREHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +NULL NULL 20 val_20 20 val_20 +NULL NULL 23 val_23 23 val_23 +PREHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +1 val_1 NULL NULL NULL NULL +3 val_3 NULL NULL NULL NULL +4 val_4 NULL NULL NULL NULL +5 val_5 NULL NULL NULL NULL +10 val_10 NULL NULL NULL NULL +NULL NULL 20 val_20 20 val_20 +NULL NULL 23 val_23 23 val_23 +NULL NULL 25 val_25 NULL NULL +NULL NULL 30 val_30 NULL NULL +PREHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +NULL NULL NULL NULL 4 val_4 +NULL NULL NULL NULL 10 val_10 +NULL NULL NULL NULL 17 val_17 +NULL NULL NULL NULL 19 val_19 +NULL NULL 20 val_20 20 val_20 +NULL NULL 23 val_23 23 val_23 +PREHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + Outer Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,b)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +1 val_1 NULL NULL NULL NULL +3 val_3 NULL NULL NULL NULL +4 val_4 NULL NULL NULL NULL +NULL NULL NULL NULL 4 val_4 +5 val_5 NULL NULL NULL NULL +10 val_10 NULL NULL NULL NULL +NULL NULL NULL NULL 10 val_10 +NULL NULL NULL NULL 17 val_17 +NULL NULL NULL NULL 19 val_19 +NULL NULL 20 val_20 20 val_20 +NULL NULL 23 val_23 23 val_23 +NULL NULL 25 val_25 NULL NULL +NULL NULL 30 val_30 NULL NULL diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out new file mode 100644 index 0000000..e82038b --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out @@ -0,0 +1,1325 @@ +PREHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket_1 +POSTHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket_1 +PREHOOK: query: create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket_2 +POSTHOOK: query: create table smb_bucket_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket_2 +PREHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket_3 +POSTHOOK: query: create table smb_bucket_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket_3 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket_1 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table smb_bucket_1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket_1 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket_2 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket_2 +PREHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket_3 +POSTHOOK: query: load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket_3 +PREHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +PREHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +PREHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +1 val_1 NULL NULL NULL NULL +3 val_3 NULL NULL NULL NULL +4 val_4 NULL NULL NULL NULL +5 val_5 NULL NULL NULL NULL +10 val_10 NULL NULL NULL NULL +PREHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +NULL NULL NULL NULL 4 val_4 +NULL NULL NULL NULL 10 val_10 +NULL NULL NULL NULL 17 val_17 +NULL NULL NULL NULL 19 val_19 +NULL NULL NULL NULL 20 val_20 +NULL NULL NULL NULL 23 val_23 +PREHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Outer Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +1 val_1 NULL NULL NULL NULL +3 val_3 NULL NULL NULL NULL +4 val_4 NULL NULL NULL NULL +NULL NULL NULL NULL 4 val_4 +5 val_5 NULL NULL NULL NULL +10 val_10 NULL NULL NULL NULL +NULL NULL NULL NULL 10 val_10 +NULL NULL NULL NULL 17 val_17 +NULL NULL NULL NULL 19 val_19 +NULL NULL NULL NULL 20 val_20 +NULL NULL NULL NULL 23 val_23 +PREHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +NULL NULL 20 val_20 20 val_20 +NULL NULL 23 val_23 23 val_23 +PREHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +NULL NULL 20 val_20 20 val_20 +NULL NULL 23 val_23 23 val_23 +NULL NULL 25 val_25 NULL NULL +NULL NULL 30 val_30 NULL NULL +PREHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +NULL NULL NULL NULL 4 val_4 +NULL NULL NULL NULL 10 val_10 +NULL NULL NULL NULL 17 val_17 +NULL NULL NULL NULL 19 val_19 +NULL NULL 20 val_20 20 val_20 +NULL NULL 23 val_23 23 val_23 +PREHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Outer Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +NULL NULL NULL NULL 4 val_4 +NULL NULL NULL NULL 10 val_10 +NULL NULL NULL NULL 17 val_17 +NULL NULL NULL NULL 19 val_19 +NULL NULL 20 val_20 20 val_20 +NULL NULL 23 val_23 23 val_23 +NULL NULL 25 val_25 NULL NULL +NULL NULL 30 val_30 NULL NULL +PREHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +NULL NULL 20 val_20 20 val_20 +NULL NULL 23 val_23 23 val_23 +PREHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +1 val_1 NULL NULL NULL NULL +3 val_3 NULL NULL NULL NULL +4 val_4 NULL NULL NULL NULL +5 val_5 NULL NULL NULL NULL +10 val_10 NULL NULL NULL NULL +NULL NULL 20 val_20 20 val_20 +NULL NULL 23 val_23 23 val_23 +NULL NULL 25 val_25 NULL NULL +NULL NULL 30 val_30 NULL NULL +PREHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +NULL NULL NULL NULL 4 val_4 +NULL NULL NULL NULL 10 val_10 +NULL NULL NULL NULL 17 val_17 +NULL NULL NULL NULL 19 val_19 +NULL NULL 20 val_20 20 val_20 +NULL NULL 23 val_23 23 val_23 +PREHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + Outer Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_1 +PREHOOK: Input: default@smb_bucket_2 +PREHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a,c)*/ * from smb_bucket_1 a full outer join smb_bucket_2 b on a.key = b.key full outer join smb_bucket_3 c on b.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_1 +POSTHOOK: Input: default@smb_bucket_2 +POSTHOOK: Input: default@smb_bucket_3 +#### A masked pattern was here #### +1 val_1 NULL NULL NULL NULL +3 val_3 NULL NULL NULL NULL +4 val_4 NULL NULL NULL NULL +NULL NULL NULL NULL 4 val_4 +5 val_5 NULL NULL NULL NULL +10 val_10 NULL NULL NULL NULL +NULL NULL NULL NULL 10 val_10 +NULL NULL NULL NULL 17 val_17 +NULL NULL NULL NULL 19 val_19 +NULL NULL 20 val_20 20 val_20 +NULL NULL 23 val_23 23 val_23 +NULL NULL 25 val_25 NULL NULL +NULL NULL 30 val_30 NULL NULL diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out new file mode 100644 index 0000000..1836f12 --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out @@ -0,0 +1,2732 @@ +PREHOOK: query: CREATE TABLE smb_bucket4_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket4_1 +POSTHOOK: query: CREATE TABLE smb_bucket4_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket4_1 +PREHOOK: query: CREATE TABLE smb_bucket4_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS RCFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket4_2 +POSTHOOK: query: CREATE TABLE smb_bucket4_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket4_2 +PREHOOK: query: create table smb_join_results(k1 int, v1 string, k2 int, v2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_join_results +POSTHOOK: query: create table smb_join_results(k1 int, v1 string, k2 int, v2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_join_results +PREHOOK: query: create table normal_join_results(k1 int, v1 string, k2 int, v2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@normal_join_results +POSTHOOK: query: create table normal_join_results(k1 int, v1 string, k2 int, v2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@normal_join_results +PREHOOK: query: insert overwrite table smb_bucket4_1 +select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@smb_bucket4_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_1 +select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@smb_bucket4_1 +POSTHOOK: Lineage: smb_bucket4_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smb_bucket4_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table smb_bucket4_2 +select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@smb_bucket4_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_2 +select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@smb_bucket4_2 +POSTHOOK: Lineage: smb_bucket4_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smb_bucket4_2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 48 Data size: 5008 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 24 Data size: 2504 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 24 Data size: 2504 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 48 Data size: 5008 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 24 Data size: 2504 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 24 Data size: 2504 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 26 Data size: 2754 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 2754 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 2754 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +PREHOOK: Output: default@smb_join_results +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +POSTHOOK: Output: default@smb_join_results +POSTHOOK: Lineage: smb_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from smb_join_results order by k1 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_join_results +#### A masked pattern was here #### +POSTHOOK: query: select * from smb_join_results order by k1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_join_results +#### A masked pattern was here #### +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +2 val_2 2 val_2 +4 val_4 4 val_4 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +8 val_8 8 val_8 +9 val_9 9 val_9 +10 val_10 10 val_10 +11 val_11 11 val_11 +12 val_12 12 val_12 +12 val_12 12 val_12 +12 val_12 12 val_12 +12 val_12 12 val_12 +15 val_15 15 val_15 +15 val_15 15 val_15 +15 val_15 15 val_15 +15 val_15 15 val_15 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +20 val_20 20 val_20 +24 val_24 24 val_24 +24 val_24 24 val_24 +24 val_24 24 val_24 +24 val_24 24 val_24 +26 val_26 26 val_26 +26 val_26 26 val_26 +26 val_26 26 val_26 +26 val_26 26 val_26 +27 val_27 27 val_27 +28 val_28 28 val_28 +30 val_30 30 val_30 +33 val_33 33 val_33 +34 val_34 34 val_34 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +37 val_37 37 val_37 +37 val_37 37 val_37 +37 val_37 37 val_37 +37 val_37 37 val_37 +41 val_41 41 val_41 +42 val_42 42 val_42 +42 val_42 42 val_42 +42 val_42 42 val_42 +42 val_42 42 val_42 +43 val_43 43 val_43 +44 val_44 44 val_44 +47 val_47 47 val_47 +51 val_51 51 val_51 +51 val_51 51 val_51 +51 val_51 51 val_51 +51 val_51 51 val_51 +53 val_53 53 val_53 +54 val_54 54 val_54 +57 val_57 57 val_57 +58 val_58 58 val_58 +58 val_58 58 val_58 +58 val_58 58 val_58 +58 val_58 58 val_58 +64 val_64 64 val_64 +65 val_65 65 val_65 +66 val_66 66 val_66 +67 val_67 67 val_67 +67 val_67 67 val_67 +67 val_67 67 val_67 +67 val_67 67 val_67 +69 val_69 69 val_69 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +72 val_72 72 val_72 +72 val_72 72 val_72 +72 val_72 72 val_72 +72 val_72 72 val_72 +74 val_74 74 val_74 +76 val_76 76 val_76 +76 val_76 76 val_76 +76 val_76 76 val_76 +76 val_76 76 val_76 +77 val_77 77 val_77 +78 val_78 78 val_78 +80 val_80 80 val_80 +82 val_82 82 val_82 +83 val_83 83 val_83 +83 val_83 83 val_83 +83 val_83 83 val_83 +83 val_83 83 val_83 +84 val_84 84 val_84 +84 val_84 84 val_84 +84 val_84 84 val_84 +84 val_84 84 val_84 +85 val_85 85 val_85 +86 val_86 86 val_86 +87 val_87 87 val_87 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +92 val_92 92 val_92 +95 val_95 95 val_95 +95 val_95 95 val_95 +95 val_95 95 val_95 +95 val_95 95 val_95 +96 val_96 96 val_96 +97 val_97 97 val_97 +97 val_97 97 val_97 +97 val_97 97 val_97 +97 val_97 97 val_97 +98 val_98 98 val_98 +98 val_98 98 val_98 +98 val_98 98 val_98 +98 val_98 98 val_98 +100 val_100 100 val_100 +100 val_100 100 val_100 +100 val_100 100 val_100 +100 val_100 100 val_100 +103 val_103 103 val_103 +103 val_103 103 val_103 +103 val_103 103 val_103 +103 val_103 103 val_103 +104 val_104 104 val_104 +104 val_104 104 val_104 +104 val_104 104 val_104 +104 val_104 104 val_104 +105 val_105 105 val_105 +111 val_111 111 val_111 +113 val_113 113 val_113 +113 val_113 113 val_113 +113 val_113 113 val_113 +113 val_113 113 val_113 +114 val_114 114 val_114 +116 val_116 116 val_116 +118 val_118 118 val_118 +118 val_118 118 val_118 +118 val_118 118 val_118 +118 val_118 118 val_118 +119 val_119 119 val_119 +119 val_119 119 val_119 +119 val_119 119 val_119 +119 val_119 119 val_119 +119 val_119 119 val_119 +119 val_119 119 val_119 +119 val_119 119 val_119 +119 val_119 119 val_119 +119 val_119 119 val_119 +120 val_120 120 val_120 +120 val_120 120 val_120 +120 val_120 120 val_120 +120 val_120 120 val_120 +125 val_125 125 val_125 +125 val_125 125 val_125 +125 val_125 125 val_125 +125 val_125 125 val_125 +126 val_126 126 val_126 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +129 val_129 129 val_129 +129 val_129 129 val_129 +129 val_129 129 val_129 +129 val_129 129 val_129 +131 val_131 131 val_131 +133 val_133 133 val_133 +134 val_134 134 val_134 +134 val_134 134 val_134 +134 val_134 134 val_134 +134 val_134 134 val_134 +136 val_136 136 val_136 +137 val_137 137 val_137 +137 val_137 137 val_137 +137 val_137 137 val_137 +137 val_137 137 val_137 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +143 val_143 143 val_143 +145 val_145 145 val_145 +146 val_146 146 val_146 +146 val_146 146 val_146 +146 val_146 146 val_146 +146 val_146 146 val_146 +149 val_149 149 val_149 +149 val_149 149 val_149 +149 val_149 149 val_149 +149 val_149 149 val_149 +150 val_150 150 val_150 +152 val_152 152 val_152 +152 val_152 152 val_152 +152 val_152 152 val_152 +152 val_152 152 val_152 +153 val_153 153 val_153 +155 val_155 155 val_155 +156 val_156 156 val_156 +157 val_157 157 val_157 +158 val_158 158 val_158 +160 val_160 160 val_160 +162 val_162 162 val_162 +163 val_163 163 val_163 +164 val_164 164 val_164 +164 val_164 164 val_164 +164 val_164 164 val_164 +164 val_164 164 val_164 +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +166 val_166 166 val_166 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +168 val_168 168 val_168 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +170 val_170 170 val_170 +172 val_172 172 val_172 +172 val_172 172 val_172 +172 val_172 172 val_172 +172 val_172 172 val_172 +174 val_174 174 val_174 +174 val_174 174 val_174 +174 val_174 174 val_174 +174 val_174 174 val_174 +175 val_175 175 val_175 +175 val_175 175 val_175 +175 val_175 175 val_175 +175 val_175 175 val_175 +176 val_176 176 val_176 +176 val_176 176 val_176 +176 val_176 176 val_176 +176 val_176 176 val_176 +177 val_177 177 val_177 +178 val_178 178 val_178 +179 val_179 179 val_179 +179 val_179 179 val_179 +179 val_179 179 val_179 +179 val_179 179 val_179 +180 val_180 180 val_180 +181 val_181 181 val_181 +183 val_183 183 val_183 +186 val_186 186 val_186 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +189 val_189 189 val_189 +190 val_190 190 val_190 +191 val_191 191 val_191 +191 val_191 191 val_191 +191 val_191 191 val_191 +191 val_191 191 val_191 +192 val_192 192 val_192 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +194 val_194 194 val_194 +195 val_195 195 val_195 +195 val_195 195 val_195 +195 val_195 195 val_195 +195 val_195 195 val_195 +196 val_196 196 val_196 +197 val_197 197 val_197 +197 val_197 197 val_197 +197 val_197 197 val_197 +197 val_197 197 val_197 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +200 val_200 200 val_200 +200 val_200 200 val_200 +200 val_200 200 val_200 +200 val_200 200 val_200 +201 val_201 201 val_201 +202 val_202 202 val_202 +203 val_203 203 val_203 +203 val_203 203 val_203 +203 val_203 203 val_203 +203 val_203 203 val_203 +205 val_205 205 val_205 +205 val_205 205 val_205 +205 val_205 205 val_205 +205 val_205 205 val_205 +207 val_207 207 val_207 +207 val_207 207 val_207 +207 val_207 207 val_207 +207 val_207 207 val_207 +208 val_208 208 val_208 +208 val_208 208 val_208 +208 val_208 208 val_208 +208 val_208 208 val_208 +208 val_208 208 val_208 +208 val_208 208 val_208 +208 val_208 208 val_208 +208 val_208 208 val_208 +208 val_208 208 val_208 +209 val_209 209 val_209 +209 val_209 209 val_209 +209 val_209 209 val_209 +209 val_209 209 val_209 +213 val_213 213 val_213 +213 val_213 213 val_213 +213 val_213 213 val_213 +213 val_213 213 val_213 +214 val_214 214 val_214 +216 val_216 216 val_216 +216 val_216 216 val_216 +216 val_216 216 val_216 +216 val_216 216 val_216 +217 val_217 217 val_217 +217 val_217 217 val_217 +217 val_217 217 val_217 +217 val_217 217 val_217 +218 val_218 218 val_218 +219 val_219 219 val_219 +219 val_219 219 val_219 +219 val_219 219 val_219 +219 val_219 219 val_219 +221 val_221 221 val_221 +221 val_221 221 val_221 +221 val_221 221 val_221 +221 val_221 221 val_221 +222 val_222 222 val_222 +223 val_223 223 val_223 +223 val_223 223 val_223 +223 val_223 223 val_223 +223 val_223 223 val_223 +224 val_224 224 val_224 +224 val_224 224 val_224 +224 val_224 224 val_224 +224 val_224 224 val_224 +226 val_226 226 val_226 +228 val_228 228 val_228 +229 val_229 229 val_229 +229 val_229 229 val_229 +229 val_229 229 val_229 +229 val_229 229 val_229 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +233 val_233 233 val_233 +233 val_233 233 val_233 +233 val_233 233 val_233 +233 val_233 233 val_233 +235 val_235 235 val_235 +237 val_237 237 val_237 +237 val_237 237 val_237 +237 val_237 237 val_237 +237 val_237 237 val_237 +238 val_238 238 val_238 +238 val_238 238 val_238 +238 val_238 238 val_238 +238 val_238 238 val_238 +239 val_239 239 val_239 +239 val_239 239 val_239 +239 val_239 239 val_239 +239 val_239 239 val_239 +241 val_241 241 val_241 +242 val_242 242 val_242 +242 val_242 242 val_242 +242 val_242 242 val_242 +242 val_242 242 val_242 +244 val_244 244 val_244 +247 val_247 247 val_247 +248 val_248 248 val_248 +249 val_249 249 val_249 +252 val_252 252 val_252 +255 val_255 255 val_255 +255 val_255 255 val_255 +255 val_255 255 val_255 +255 val_255 255 val_255 +256 val_256 256 val_256 +256 val_256 256 val_256 +256 val_256 256 val_256 +256 val_256 256 val_256 +257 val_257 257 val_257 +258 val_258 258 val_258 +260 val_260 260 val_260 +262 val_262 262 val_262 +263 val_263 263 val_263 +265 val_265 265 val_265 +265 val_265 265 val_265 +265 val_265 265 val_265 +265 val_265 265 val_265 +266 val_266 266 val_266 +272 val_272 272 val_272 +272 val_272 272 val_272 +272 val_272 272 val_272 +272 val_272 272 val_272 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +274 val_274 274 val_274 +275 val_275 275 val_275 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +278 val_278 278 val_278 +278 val_278 278 val_278 +278 val_278 278 val_278 +278 val_278 278 val_278 +280 val_280 280 val_280 +280 val_280 280 val_280 +280 val_280 280 val_280 +280 val_280 280 val_280 +281 val_281 281 val_281 +281 val_281 281 val_281 +281 val_281 281 val_281 +281 val_281 281 val_281 +282 val_282 282 val_282 +282 val_282 282 val_282 +282 val_282 282 val_282 +282 val_282 282 val_282 +283 val_283 283 val_283 +284 val_284 284 val_284 +285 val_285 285 val_285 +286 val_286 286 val_286 +287 val_287 287 val_287 +288 val_288 288 val_288 +288 val_288 288 val_288 +288 val_288 288 val_288 +288 val_288 288 val_288 +289 val_289 289 val_289 +291 val_291 291 val_291 +292 val_292 292 val_292 +296 val_296 296 val_296 +298 val_298 298 val_298 +298 val_298 298 val_298 +298 val_298 298 val_298 +298 val_298 298 val_298 +298 val_298 298 val_298 +298 val_298 298 val_298 +298 val_298 298 val_298 +298 val_298 298 val_298 +298 val_298 298 val_298 +302 val_302 302 val_302 +305 val_305 305 val_305 +306 val_306 306 val_306 +307 val_307 307 val_307 +307 val_307 307 val_307 +307 val_307 307 val_307 +307 val_307 307 val_307 +308 val_308 308 val_308 +309 val_309 309 val_309 +309 val_309 309 val_309 +309 val_309 309 val_309 +309 val_309 309 val_309 +310 val_310 310 val_310 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +315 val_315 315 val_315 +316 val_316 316 val_316 +316 val_316 316 val_316 +316 val_316 316 val_316 +316 val_316 316 val_316 +316 val_316 316 val_316 +316 val_316 316 val_316 +316 val_316 316 val_316 +316 val_316 316 val_316 +316 val_316 316 val_316 +317 val_317 317 val_317 +317 val_317 317 val_317 +317 val_317 317 val_317 +317 val_317 317 val_317 +318 val_318 318 val_318 +318 val_318 318 val_318 +318 val_318 318 val_318 +318 val_318 318 val_318 +318 val_318 318 val_318 +318 val_318 318 val_318 +318 val_318 318 val_318 +318 val_318 318 val_318 +318 val_318 318 val_318 +321 val_321 321 val_321 +321 val_321 321 val_321 +321 val_321 321 val_321 +321 val_321 321 val_321 +322 val_322 322 val_322 +322 val_322 322 val_322 +322 val_322 322 val_322 +322 val_322 322 val_322 +323 val_323 323 val_323 +325 val_325 325 val_325 +325 val_325 325 val_325 +325 val_325 325 val_325 +325 val_325 325 val_325 +327 val_327 327 val_327 +327 val_327 327 val_327 +327 val_327 327 val_327 +327 val_327 327 val_327 +327 val_327 327 val_327 +327 val_327 327 val_327 +327 val_327 327 val_327 +327 val_327 327 val_327 +327 val_327 327 val_327 +331 val_331 331 val_331 +331 val_331 331 val_331 +331 val_331 331 val_331 +331 val_331 331 val_331 +332 val_332 332 val_332 +333 val_333 333 val_333 +333 val_333 333 val_333 +333 val_333 333 val_333 +333 val_333 333 val_333 +335 val_335 335 val_335 +336 val_336 336 val_336 +338 val_338 338 val_338 +339 val_339 339 val_339 +341 val_341 341 val_341 +342 val_342 342 val_342 +342 val_342 342 val_342 +342 val_342 342 val_342 +342 val_342 342 val_342 +344 val_344 344 val_344 +344 val_344 344 val_344 +344 val_344 344 val_344 +344 val_344 344 val_344 +345 val_345 345 val_345 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +351 val_351 351 val_351 +353 val_353 353 val_353 +353 val_353 353 val_353 +353 val_353 353 val_353 +353 val_353 353 val_353 +356 val_356 356 val_356 +360 val_360 360 val_360 +362 val_362 362 val_362 +364 val_364 364 val_364 +365 val_365 365 val_365 +366 val_366 366 val_366 +367 val_367 367 val_367 +367 val_367 367 val_367 +367 val_367 367 val_367 +367 val_367 367 val_367 +368 val_368 368 val_368 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +373 val_373 373 val_373 +374 val_374 374 val_374 +375 val_375 375 val_375 +377 val_377 377 val_377 +378 val_378 378 val_378 +379 val_379 379 val_379 +382 val_382 382 val_382 +382 val_382 382 val_382 +382 val_382 382 val_382 +382 val_382 382 val_382 +384 val_384 384 val_384 +384 val_384 384 val_384 +384 val_384 384 val_384 +384 val_384 384 val_384 +384 val_384 384 val_384 +384 val_384 384 val_384 +384 val_384 384 val_384 +384 val_384 384 val_384 +384 val_384 384 val_384 +386 val_386 386 val_386 +389 val_389 389 val_389 +392 val_392 392 val_392 +393 val_393 393 val_393 +394 val_394 394 val_394 +395 val_395 395 val_395 +395 val_395 395 val_395 +395 val_395 395 val_395 +395 val_395 395 val_395 +396 val_396 396 val_396 +396 val_396 396 val_396 +396 val_396 396 val_396 +396 val_396 396 val_396 +396 val_396 396 val_396 +396 val_396 396 val_396 +396 val_396 396 val_396 +396 val_396 396 val_396 +396 val_396 396 val_396 +397 val_397 397 val_397 +397 val_397 397 val_397 +397 val_397 397 val_397 +397 val_397 397 val_397 +399 val_399 399 val_399 +399 val_399 399 val_399 +399 val_399 399 val_399 +399 val_399 399 val_399 +400 val_400 400 val_400 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +402 val_402 402 val_402 +403 val_403 403 val_403 +403 val_403 403 val_403 +403 val_403 403 val_403 +403 val_403 403 val_403 +403 val_403 403 val_403 +403 val_403 403 val_403 +403 val_403 403 val_403 +403 val_403 403 val_403 +403 val_403 403 val_403 +404 val_404 404 val_404 +404 val_404 404 val_404 +404 val_404 404 val_404 +404 val_404 404 val_404 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +407 val_407 407 val_407 +409 val_409 409 val_409 +409 val_409 409 val_409 +409 val_409 409 val_409 +409 val_409 409 val_409 +409 val_409 409 val_409 +409 val_409 409 val_409 +409 val_409 409 val_409 +409 val_409 409 val_409 +409 val_409 409 val_409 +411 val_411 411 val_411 +413 val_413 413 val_413 +413 val_413 413 val_413 +413 val_413 413 val_413 +413 val_413 413 val_413 +414 val_414 414 val_414 +414 val_414 414 val_414 +414 val_414 414 val_414 +414 val_414 414 val_414 +417 val_417 417 val_417 +417 val_417 417 val_417 +417 val_417 417 val_417 +417 val_417 417 val_417 +417 val_417 417 val_417 +417 val_417 417 val_417 +417 val_417 417 val_417 +417 val_417 417 val_417 +417 val_417 417 val_417 +418 val_418 418 val_418 +419 val_419 419 val_419 +421 val_421 421 val_421 +424 val_424 424 val_424 +424 val_424 424 val_424 +424 val_424 424 val_424 +424 val_424 424 val_424 +427 val_427 427 val_427 +429 val_429 429 val_429 +429 val_429 429 val_429 +429 val_429 429 val_429 +429 val_429 429 val_429 +430 val_430 430 val_430 +430 val_430 430 val_430 +430 val_430 430 val_430 +430 val_430 430 val_430 +430 val_430 430 val_430 +430 val_430 430 val_430 +430 val_430 430 val_430 +430 val_430 430 val_430 +430 val_430 430 val_430 +431 val_431 431 val_431 +431 val_431 431 val_431 +431 val_431 431 val_431 +431 val_431 431 val_431 +431 val_431 431 val_431 +431 val_431 431 val_431 +431 val_431 431 val_431 +431 val_431 431 val_431 +431 val_431 431 val_431 +432 val_432 432 val_432 +435 val_435 435 val_435 +436 val_436 436 val_436 +437 val_437 437 val_437 +438 val_438 438 val_438 +438 val_438 438 val_438 +438 val_438 438 val_438 +438 val_438 438 val_438 +438 val_438 438 val_438 +438 val_438 438 val_438 +438 val_438 438 val_438 +438 val_438 438 val_438 +438 val_438 438 val_438 +439 val_439 439 val_439 +439 val_439 439 val_439 +439 val_439 439 val_439 +439 val_439 439 val_439 +443 val_443 443 val_443 +444 val_444 444 val_444 +446 val_446 446 val_446 +448 val_448 448 val_448 +449 val_449 449 val_449 +452 val_452 452 val_452 +453 val_453 453 val_453 +454 val_454 454 val_454 +454 val_454 454 val_454 +454 val_454 454 val_454 +454 val_454 454 val_454 +454 val_454 454 val_454 +454 val_454 454 val_454 +454 val_454 454 val_454 +454 val_454 454 val_454 +454 val_454 454 val_454 +455 val_455 455 val_455 +457 val_457 457 val_457 +458 val_458 458 val_458 +458 val_458 458 val_458 +458 val_458 458 val_458 +458 val_458 458 val_458 +459 val_459 459 val_459 +459 val_459 459 val_459 +459 val_459 459 val_459 +459 val_459 459 val_459 +460 val_460 460 val_460 +462 val_462 462 val_462 +462 val_462 462 val_462 +462 val_462 462 val_462 +462 val_462 462 val_462 +463 val_463 463 val_463 +463 val_463 463 val_463 +463 val_463 463 val_463 +463 val_463 463 val_463 +466 val_466 466 val_466 +466 val_466 466 val_466 +466 val_466 466 val_466 +466 val_466 466 val_466 +466 val_466 466 val_466 +466 val_466 466 val_466 +466 val_466 466 val_466 +466 val_466 466 val_466 +466 val_466 466 val_466 +467 val_467 467 val_467 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +470 val_470 470 val_470 +472 val_472 472 val_472 +475 val_475 475 val_475 +477 val_477 477 val_477 +478 val_478 478 val_478 +478 val_478 478 val_478 +478 val_478 478 val_478 +478 val_478 478 val_478 +479 val_479 479 val_479 +480 val_480 480 val_480 +480 val_480 480 val_480 +480 val_480 480 val_480 +480 val_480 480 val_480 +480 val_480 480 val_480 +480 val_480 480 val_480 +480 val_480 480 val_480 +480 val_480 480 val_480 +480 val_480 480 val_480 +481 val_481 481 val_481 +482 val_482 482 val_482 +483 val_483 483 val_483 +484 val_484 484 val_484 +485 val_485 485 val_485 +487 val_487 487 val_487 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +490 val_490 490 val_490 +491 val_491 491 val_491 +492 val_492 492 val_492 +492 val_492 492 val_492 +492 val_492 492 val_492 +492 val_492 492 val_492 +493 val_493 493 val_493 +494 val_494 494 val_494 +495 val_495 495 val_495 +496 val_496 496 val_496 +497 val_497 497 val_497 +498 val_498 498 val_498 +498 val_498 498 val_498 +498 val_498 498 val_498 +498 val_498 498 val_498 +498 val_498 498 val_498 +498 val_498 498 val_498 +498 val_498 498 val_498 +498 val_498 498 val_498 +498 val_498 498 val_498 +PREHOOK: query: insert overwrite table normal_join_results select * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +PREHOOK: Output: default@normal_join_results +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table normal_join_results select * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +POSTHOOK: Output: default@normal_join_results +POSTHOOK: Lineage: normal_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from normal_join_results +PREHOOK: type: QUERY +PREHOOK: Input: default@normal_join_results +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from normal_join_results +POSTHOOK: type: QUERY +POSTHOOK: Input: default@normal_join_results +#### A masked pattern was here #### +278697 278697 101852390308 101852390308 +PREHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from smb_join_results +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_join_results +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from smb_join_results +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_join_results +#### A masked pattern was here #### +278697 278697 101852390308 101852390308 +PREHOOK: query: explain +insert overwrite table smb_join_results +select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table smb_join_results +select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 48 Data size: 5008 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 24 Data size: 2504 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 24 Data size: 2504 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 48 Data size: 5008 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 24 Data size: 2504 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 24 Data size: 2504 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 26 Data size: 2754 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 2754 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 2754 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table smb_join_results +select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +PREHOOK: Output: default@smb_join_results +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_join_results +select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +POSTHOOK: Output: default@smb_join_results +POSTHOOK: Lineage: smb_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +PREHOOK: Output: default@smb_join_results +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +POSTHOOK: Output: default@smb_join_results +POSTHOOK: Lineage: smb_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from smb_join_results order by k1 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_join_results +#### A masked pattern was here #### +POSTHOOK: query: select * from smb_join_results order by k1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_join_results +#### A masked pattern was here #### +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +2 val_2 2 val_2 +4 val_4 4 val_4 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +5 val_5 5 val_5 +8 val_8 8 val_8 +9 val_9 9 val_9 +10 val_10 10 val_10 +11 val_11 11 val_11 +12 val_12 12 val_12 +12 val_12 12 val_12 +12 val_12 12 val_12 +12 val_12 12 val_12 +15 val_15 15 val_15 +15 val_15 15 val_15 +15 val_15 15 val_15 +15 val_15 15 val_15 +17 val_17 17 val_17 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +18 val_18 18 val_18 +19 val_19 19 val_19 +20 val_20 20 val_20 +24 val_24 24 val_24 +24 val_24 24 val_24 +24 val_24 24 val_24 +24 val_24 24 val_24 +26 val_26 26 val_26 +26 val_26 26 val_26 +26 val_26 26 val_26 +26 val_26 26 val_26 +27 val_27 27 val_27 +28 val_28 28 val_28 +30 val_30 30 val_30 +33 val_33 33 val_33 +34 val_34 34 val_34 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +35 val_35 35 val_35 +37 val_37 37 val_37 +37 val_37 37 val_37 +37 val_37 37 val_37 +37 val_37 37 val_37 +41 val_41 41 val_41 +42 val_42 42 val_42 +42 val_42 42 val_42 +42 val_42 42 val_42 +42 val_42 42 val_42 +43 val_43 43 val_43 +44 val_44 44 val_44 +47 val_47 47 val_47 +51 val_51 51 val_51 +51 val_51 51 val_51 +51 val_51 51 val_51 +51 val_51 51 val_51 +53 val_53 53 val_53 +54 val_54 54 val_54 +57 val_57 57 val_57 +58 val_58 58 val_58 +58 val_58 58 val_58 +58 val_58 58 val_58 +58 val_58 58 val_58 +64 val_64 64 val_64 +65 val_65 65 val_65 +66 val_66 66 val_66 +67 val_67 67 val_67 +67 val_67 67 val_67 +67 val_67 67 val_67 +67 val_67 67 val_67 +69 val_69 69 val_69 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +70 val_70 70 val_70 +72 val_72 72 val_72 +72 val_72 72 val_72 +72 val_72 72 val_72 +72 val_72 72 val_72 +74 val_74 74 val_74 +76 val_76 76 val_76 +76 val_76 76 val_76 +76 val_76 76 val_76 +76 val_76 76 val_76 +77 val_77 77 val_77 +78 val_78 78 val_78 +80 val_80 80 val_80 +82 val_82 82 val_82 +83 val_83 83 val_83 +83 val_83 83 val_83 +83 val_83 83 val_83 +83 val_83 83 val_83 +84 val_84 84 val_84 +84 val_84 84 val_84 +84 val_84 84 val_84 +84 val_84 84 val_84 +85 val_85 85 val_85 +86 val_86 86 val_86 +87 val_87 87 val_87 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +90 val_90 90 val_90 +92 val_92 92 val_92 +95 val_95 95 val_95 +95 val_95 95 val_95 +95 val_95 95 val_95 +95 val_95 95 val_95 +96 val_96 96 val_96 +97 val_97 97 val_97 +97 val_97 97 val_97 +97 val_97 97 val_97 +97 val_97 97 val_97 +98 val_98 98 val_98 +98 val_98 98 val_98 +98 val_98 98 val_98 +98 val_98 98 val_98 +100 val_100 100 val_100 +100 val_100 100 val_100 +100 val_100 100 val_100 +100 val_100 100 val_100 +103 val_103 103 val_103 +103 val_103 103 val_103 +103 val_103 103 val_103 +103 val_103 103 val_103 +104 val_104 104 val_104 +104 val_104 104 val_104 +104 val_104 104 val_104 +104 val_104 104 val_104 +105 val_105 105 val_105 +111 val_111 111 val_111 +113 val_113 113 val_113 +113 val_113 113 val_113 +113 val_113 113 val_113 +113 val_113 113 val_113 +114 val_114 114 val_114 +116 val_116 116 val_116 +118 val_118 118 val_118 +118 val_118 118 val_118 +118 val_118 118 val_118 +118 val_118 118 val_118 +119 val_119 119 val_119 +119 val_119 119 val_119 +119 val_119 119 val_119 +119 val_119 119 val_119 +119 val_119 119 val_119 +119 val_119 119 val_119 +119 val_119 119 val_119 +119 val_119 119 val_119 +119 val_119 119 val_119 +120 val_120 120 val_120 +120 val_120 120 val_120 +120 val_120 120 val_120 +120 val_120 120 val_120 +125 val_125 125 val_125 +125 val_125 125 val_125 +125 val_125 125 val_125 +125 val_125 125 val_125 +126 val_126 126 val_126 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +129 val_129 129 val_129 +129 val_129 129 val_129 +129 val_129 129 val_129 +129 val_129 129 val_129 +131 val_131 131 val_131 +133 val_133 133 val_133 +134 val_134 134 val_134 +134 val_134 134 val_134 +134 val_134 134 val_134 +134 val_134 134 val_134 +136 val_136 136 val_136 +137 val_137 137 val_137 +137 val_137 137 val_137 +137 val_137 137 val_137 +137 val_137 137 val_137 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +138 val_138 138 val_138 +143 val_143 143 val_143 +145 val_145 145 val_145 +146 val_146 146 val_146 +146 val_146 146 val_146 +146 val_146 146 val_146 +146 val_146 146 val_146 +149 val_149 149 val_149 +149 val_149 149 val_149 +149 val_149 149 val_149 +149 val_149 149 val_149 +150 val_150 150 val_150 +152 val_152 152 val_152 +152 val_152 152 val_152 +152 val_152 152 val_152 +152 val_152 152 val_152 +153 val_153 153 val_153 +155 val_155 155 val_155 +156 val_156 156 val_156 +157 val_157 157 val_157 +158 val_158 158 val_158 +160 val_160 160 val_160 +162 val_162 162 val_162 +163 val_163 163 val_163 +164 val_164 164 val_164 +164 val_164 164 val_164 +164 val_164 164 val_164 +164 val_164 164 val_164 +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +166 val_166 166 val_166 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +167 val_167 167 val_167 +168 val_168 168 val_168 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +169 val_169 169 val_169 +170 val_170 170 val_170 +172 val_172 172 val_172 +172 val_172 172 val_172 +172 val_172 172 val_172 +172 val_172 172 val_172 +174 val_174 174 val_174 +174 val_174 174 val_174 +174 val_174 174 val_174 +174 val_174 174 val_174 +175 val_175 175 val_175 +175 val_175 175 val_175 +175 val_175 175 val_175 +175 val_175 175 val_175 +176 val_176 176 val_176 +176 val_176 176 val_176 +176 val_176 176 val_176 +176 val_176 176 val_176 +177 val_177 177 val_177 +178 val_178 178 val_178 +179 val_179 179 val_179 +179 val_179 179 val_179 +179 val_179 179 val_179 +179 val_179 179 val_179 +180 val_180 180 val_180 +181 val_181 181 val_181 +183 val_183 183 val_183 +186 val_186 186 val_186 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +187 val_187 187 val_187 +189 val_189 189 val_189 +190 val_190 190 val_190 +191 val_191 191 val_191 +191 val_191 191 val_191 +191 val_191 191 val_191 +191 val_191 191 val_191 +192 val_192 192 val_192 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +193 val_193 193 val_193 +194 val_194 194 val_194 +195 val_195 195 val_195 +195 val_195 195 val_195 +195 val_195 195 val_195 +195 val_195 195 val_195 +196 val_196 196 val_196 +197 val_197 197 val_197 +197 val_197 197 val_197 +197 val_197 197 val_197 +197 val_197 197 val_197 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +199 val_199 199 val_199 +200 val_200 200 val_200 +200 val_200 200 val_200 +200 val_200 200 val_200 +200 val_200 200 val_200 +201 val_201 201 val_201 +202 val_202 202 val_202 +203 val_203 203 val_203 +203 val_203 203 val_203 +203 val_203 203 val_203 +203 val_203 203 val_203 +205 val_205 205 val_205 +205 val_205 205 val_205 +205 val_205 205 val_205 +205 val_205 205 val_205 +207 val_207 207 val_207 +207 val_207 207 val_207 +207 val_207 207 val_207 +207 val_207 207 val_207 +208 val_208 208 val_208 +208 val_208 208 val_208 +208 val_208 208 val_208 +208 val_208 208 val_208 +208 val_208 208 val_208 +208 val_208 208 val_208 +208 val_208 208 val_208 +208 val_208 208 val_208 +208 val_208 208 val_208 +209 val_209 209 val_209 +209 val_209 209 val_209 +209 val_209 209 val_209 +209 val_209 209 val_209 +213 val_213 213 val_213 +213 val_213 213 val_213 +213 val_213 213 val_213 +213 val_213 213 val_213 +214 val_214 214 val_214 +216 val_216 216 val_216 +216 val_216 216 val_216 +216 val_216 216 val_216 +216 val_216 216 val_216 +217 val_217 217 val_217 +217 val_217 217 val_217 +217 val_217 217 val_217 +217 val_217 217 val_217 +218 val_218 218 val_218 +219 val_219 219 val_219 +219 val_219 219 val_219 +219 val_219 219 val_219 +219 val_219 219 val_219 +221 val_221 221 val_221 +221 val_221 221 val_221 +221 val_221 221 val_221 +221 val_221 221 val_221 +222 val_222 222 val_222 +223 val_223 223 val_223 +223 val_223 223 val_223 +223 val_223 223 val_223 +223 val_223 223 val_223 +224 val_224 224 val_224 +224 val_224 224 val_224 +224 val_224 224 val_224 +224 val_224 224 val_224 +226 val_226 226 val_226 +228 val_228 228 val_228 +229 val_229 229 val_229 +229 val_229 229 val_229 +229 val_229 229 val_229 +229 val_229 229 val_229 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +230 val_230 230 val_230 +233 val_233 233 val_233 +233 val_233 233 val_233 +233 val_233 233 val_233 +233 val_233 233 val_233 +235 val_235 235 val_235 +237 val_237 237 val_237 +237 val_237 237 val_237 +237 val_237 237 val_237 +237 val_237 237 val_237 +238 val_238 238 val_238 +238 val_238 238 val_238 +238 val_238 238 val_238 +238 val_238 238 val_238 +239 val_239 239 val_239 +239 val_239 239 val_239 +239 val_239 239 val_239 +239 val_239 239 val_239 +241 val_241 241 val_241 +242 val_242 242 val_242 +242 val_242 242 val_242 +242 val_242 242 val_242 +242 val_242 242 val_242 +244 val_244 244 val_244 +247 val_247 247 val_247 +248 val_248 248 val_248 +249 val_249 249 val_249 +252 val_252 252 val_252 +255 val_255 255 val_255 +255 val_255 255 val_255 +255 val_255 255 val_255 +255 val_255 255 val_255 +256 val_256 256 val_256 +256 val_256 256 val_256 +256 val_256 256 val_256 +256 val_256 256 val_256 +257 val_257 257 val_257 +258 val_258 258 val_258 +260 val_260 260 val_260 +262 val_262 262 val_262 +263 val_263 263 val_263 +265 val_265 265 val_265 +265 val_265 265 val_265 +265 val_265 265 val_265 +265 val_265 265 val_265 +266 val_266 266 val_266 +272 val_272 272 val_272 +272 val_272 272 val_272 +272 val_272 272 val_272 +272 val_272 272 val_272 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +274 val_274 274 val_274 +275 val_275 275 val_275 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +277 val_277 277 val_277 +278 val_278 278 val_278 +278 val_278 278 val_278 +278 val_278 278 val_278 +278 val_278 278 val_278 +280 val_280 280 val_280 +280 val_280 280 val_280 +280 val_280 280 val_280 +280 val_280 280 val_280 +281 val_281 281 val_281 +281 val_281 281 val_281 +281 val_281 281 val_281 +281 val_281 281 val_281 +282 val_282 282 val_282 +282 val_282 282 val_282 +282 val_282 282 val_282 +282 val_282 282 val_282 +283 val_283 283 val_283 +284 val_284 284 val_284 +285 val_285 285 val_285 +286 val_286 286 val_286 +287 val_287 287 val_287 +288 val_288 288 val_288 +288 val_288 288 val_288 +288 val_288 288 val_288 +288 val_288 288 val_288 +289 val_289 289 val_289 +291 val_291 291 val_291 +292 val_292 292 val_292 +296 val_296 296 val_296 +298 val_298 298 val_298 +298 val_298 298 val_298 +298 val_298 298 val_298 +298 val_298 298 val_298 +298 val_298 298 val_298 +298 val_298 298 val_298 +298 val_298 298 val_298 +298 val_298 298 val_298 +298 val_298 298 val_298 +302 val_302 302 val_302 +305 val_305 305 val_305 +306 val_306 306 val_306 +307 val_307 307 val_307 +307 val_307 307 val_307 +307 val_307 307 val_307 +307 val_307 307 val_307 +308 val_308 308 val_308 +309 val_309 309 val_309 +309 val_309 309 val_309 +309 val_309 309 val_309 +309 val_309 309 val_309 +310 val_310 310 val_310 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +315 val_315 315 val_315 +316 val_316 316 val_316 +316 val_316 316 val_316 +316 val_316 316 val_316 +316 val_316 316 val_316 +316 val_316 316 val_316 +316 val_316 316 val_316 +316 val_316 316 val_316 +316 val_316 316 val_316 +316 val_316 316 val_316 +317 val_317 317 val_317 +317 val_317 317 val_317 +317 val_317 317 val_317 +317 val_317 317 val_317 +318 val_318 318 val_318 +318 val_318 318 val_318 +318 val_318 318 val_318 +318 val_318 318 val_318 +318 val_318 318 val_318 +318 val_318 318 val_318 +318 val_318 318 val_318 +318 val_318 318 val_318 +318 val_318 318 val_318 +321 val_321 321 val_321 +321 val_321 321 val_321 +321 val_321 321 val_321 +321 val_321 321 val_321 +322 val_322 322 val_322 +322 val_322 322 val_322 +322 val_322 322 val_322 +322 val_322 322 val_322 +323 val_323 323 val_323 +325 val_325 325 val_325 +325 val_325 325 val_325 +325 val_325 325 val_325 +325 val_325 325 val_325 +327 val_327 327 val_327 +327 val_327 327 val_327 +327 val_327 327 val_327 +327 val_327 327 val_327 +327 val_327 327 val_327 +327 val_327 327 val_327 +327 val_327 327 val_327 +327 val_327 327 val_327 +327 val_327 327 val_327 +331 val_331 331 val_331 +331 val_331 331 val_331 +331 val_331 331 val_331 +331 val_331 331 val_331 +332 val_332 332 val_332 +333 val_333 333 val_333 +333 val_333 333 val_333 +333 val_333 333 val_333 +333 val_333 333 val_333 +335 val_335 335 val_335 +336 val_336 336 val_336 +338 val_338 338 val_338 +339 val_339 339 val_339 +341 val_341 341 val_341 +342 val_342 342 val_342 +342 val_342 342 val_342 +342 val_342 342 val_342 +342 val_342 342 val_342 +344 val_344 344 val_344 +344 val_344 344 val_344 +344 val_344 344 val_344 +344 val_344 344 val_344 +345 val_345 345 val_345 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +348 val_348 348 val_348 +351 val_351 351 val_351 +353 val_353 353 val_353 +353 val_353 353 val_353 +353 val_353 353 val_353 +353 val_353 353 val_353 +356 val_356 356 val_356 +360 val_360 360 val_360 +362 val_362 362 val_362 +364 val_364 364 val_364 +365 val_365 365 val_365 +366 val_366 366 val_366 +367 val_367 367 val_367 +367 val_367 367 val_367 +367 val_367 367 val_367 +367 val_367 367 val_367 +368 val_368 368 val_368 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +373 val_373 373 val_373 +374 val_374 374 val_374 +375 val_375 375 val_375 +377 val_377 377 val_377 +378 val_378 378 val_378 +379 val_379 379 val_379 +382 val_382 382 val_382 +382 val_382 382 val_382 +382 val_382 382 val_382 +382 val_382 382 val_382 +384 val_384 384 val_384 +384 val_384 384 val_384 +384 val_384 384 val_384 +384 val_384 384 val_384 +384 val_384 384 val_384 +384 val_384 384 val_384 +384 val_384 384 val_384 +384 val_384 384 val_384 +384 val_384 384 val_384 +386 val_386 386 val_386 +389 val_389 389 val_389 +392 val_392 392 val_392 +393 val_393 393 val_393 +394 val_394 394 val_394 +395 val_395 395 val_395 +395 val_395 395 val_395 +395 val_395 395 val_395 +395 val_395 395 val_395 +396 val_396 396 val_396 +396 val_396 396 val_396 +396 val_396 396 val_396 +396 val_396 396 val_396 +396 val_396 396 val_396 +396 val_396 396 val_396 +396 val_396 396 val_396 +396 val_396 396 val_396 +396 val_396 396 val_396 +397 val_397 397 val_397 +397 val_397 397 val_397 +397 val_397 397 val_397 +397 val_397 397 val_397 +399 val_399 399 val_399 +399 val_399 399 val_399 +399 val_399 399 val_399 +399 val_399 399 val_399 +400 val_400 400 val_400 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +402 val_402 402 val_402 +403 val_403 403 val_403 +403 val_403 403 val_403 +403 val_403 403 val_403 +403 val_403 403 val_403 +403 val_403 403 val_403 +403 val_403 403 val_403 +403 val_403 403 val_403 +403 val_403 403 val_403 +403 val_403 403 val_403 +404 val_404 404 val_404 +404 val_404 404 val_404 +404 val_404 404 val_404 +404 val_404 404 val_404 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +407 val_407 407 val_407 +409 val_409 409 val_409 +409 val_409 409 val_409 +409 val_409 409 val_409 +409 val_409 409 val_409 +409 val_409 409 val_409 +409 val_409 409 val_409 +409 val_409 409 val_409 +409 val_409 409 val_409 +409 val_409 409 val_409 +411 val_411 411 val_411 +413 val_413 413 val_413 +413 val_413 413 val_413 +413 val_413 413 val_413 +413 val_413 413 val_413 +414 val_414 414 val_414 +414 val_414 414 val_414 +414 val_414 414 val_414 +414 val_414 414 val_414 +417 val_417 417 val_417 +417 val_417 417 val_417 +417 val_417 417 val_417 +417 val_417 417 val_417 +417 val_417 417 val_417 +417 val_417 417 val_417 +417 val_417 417 val_417 +417 val_417 417 val_417 +417 val_417 417 val_417 +418 val_418 418 val_418 +419 val_419 419 val_419 +421 val_421 421 val_421 +424 val_424 424 val_424 +424 val_424 424 val_424 +424 val_424 424 val_424 +424 val_424 424 val_424 +427 val_427 427 val_427 +429 val_429 429 val_429 +429 val_429 429 val_429 +429 val_429 429 val_429 +429 val_429 429 val_429 +430 val_430 430 val_430 +430 val_430 430 val_430 +430 val_430 430 val_430 +430 val_430 430 val_430 +430 val_430 430 val_430 +430 val_430 430 val_430 +430 val_430 430 val_430 +430 val_430 430 val_430 +430 val_430 430 val_430 +431 val_431 431 val_431 +431 val_431 431 val_431 +431 val_431 431 val_431 +431 val_431 431 val_431 +431 val_431 431 val_431 +431 val_431 431 val_431 +431 val_431 431 val_431 +431 val_431 431 val_431 +431 val_431 431 val_431 +432 val_432 432 val_432 +435 val_435 435 val_435 +436 val_436 436 val_436 +437 val_437 437 val_437 +438 val_438 438 val_438 +438 val_438 438 val_438 +438 val_438 438 val_438 +438 val_438 438 val_438 +438 val_438 438 val_438 +438 val_438 438 val_438 +438 val_438 438 val_438 +438 val_438 438 val_438 +438 val_438 438 val_438 +439 val_439 439 val_439 +439 val_439 439 val_439 +439 val_439 439 val_439 +439 val_439 439 val_439 +443 val_443 443 val_443 +444 val_444 444 val_444 +446 val_446 446 val_446 +448 val_448 448 val_448 +449 val_449 449 val_449 +452 val_452 452 val_452 +453 val_453 453 val_453 +454 val_454 454 val_454 +454 val_454 454 val_454 +454 val_454 454 val_454 +454 val_454 454 val_454 +454 val_454 454 val_454 +454 val_454 454 val_454 +454 val_454 454 val_454 +454 val_454 454 val_454 +454 val_454 454 val_454 +455 val_455 455 val_455 +457 val_457 457 val_457 +458 val_458 458 val_458 +458 val_458 458 val_458 +458 val_458 458 val_458 +458 val_458 458 val_458 +459 val_459 459 val_459 +459 val_459 459 val_459 +459 val_459 459 val_459 +459 val_459 459 val_459 +460 val_460 460 val_460 +462 val_462 462 val_462 +462 val_462 462 val_462 +462 val_462 462 val_462 +462 val_462 462 val_462 +463 val_463 463 val_463 +463 val_463 463 val_463 +463 val_463 463 val_463 +463 val_463 463 val_463 +466 val_466 466 val_466 +466 val_466 466 val_466 +466 val_466 466 val_466 +466 val_466 466 val_466 +466 val_466 466 val_466 +466 val_466 466 val_466 +466 val_466 466 val_466 +466 val_466 466 val_466 +466 val_466 466 val_466 +467 val_467 467 val_467 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +468 val_468 468 val_468 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +469 val_469 469 val_469 +470 val_470 470 val_470 +472 val_472 472 val_472 +475 val_475 475 val_475 +477 val_477 477 val_477 +478 val_478 478 val_478 +478 val_478 478 val_478 +478 val_478 478 val_478 +478 val_478 478 val_478 +479 val_479 479 val_479 +480 val_480 480 val_480 +480 val_480 480 val_480 +480 val_480 480 val_480 +480 val_480 480 val_480 +480 val_480 480 val_480 +480 val_480 480 val_480 +480 val_480 480 val_480 +480 val_480 480 val_480 +480 val_480 480 val_480 +481 val_481 481 val_481 +482 val_482 482 val_482 +483 val_483 483 val_483 +484 val_484 484 val_484 +485 val_485 485 val_485 +487 val_487 487 val_487 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +489 val_489 489 val_489 +490 val_490 490 val_490 +491 val_491 491 val_491 +492 val_492 492 val_492 +492 val_492 492 val_492 +492 val_492 492 val_492 +492 val_492 492 val_492 +493 val_493 493 val_493 +494 val_494 494 val_494 +495 val_495 495 val_495 +496 val_496 496 val_496 +497 val_497 497 val_497 +498 val_498 498 val_498 +498 val_498 498 val_498 +498 val_498 498 val_498 +498 val_498 498 val_498 +498 val_498 498 val_498 +498 val_498 498 val_498 +498 val_498 498 val_498 +498 val_498 498 val_498 +498 val_498 498 val_498 +PREHOOK: query: insert overwrite table normal_join_results select * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +PREHOOK: Output: default@normal_join_results +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table normal_join_results select * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +POSTHOOK: Output: default@normal_join_results +POSTHOOK: Lineage: normal_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from normal_join_results +PREHOOK: type: QUERY +PREHOOK: Input: default@normal_join_results +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from normal_join_results +POSTHOOK: type: QUERY +POSTHOOK: Input: default@normal_join_results +#### A masked pattern was here #### +278697 278697 101852390308 101852390308 +PREHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from smb_join_results +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_join_results +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from smb_join_results +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_join_results +#### A masked pattern was here #### +278697 278697 101852390308 101852390308 +PREHOOK: query: explain +insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 48 Data size: 5008 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key > 1000)) (type: boolean) + Statistics: Num rows: 8 Data size: 834 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 8 Data size: 834 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 48 Data size: 5008 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key > 1000)) (type: boolean) + Statistics: Num rows: 8 Data size: 834 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 8 Data size: 834 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 8 Data size: 917 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 917 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 917 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +PREHOOK: Output: default@smb_join_results +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +POSTHOOK: Output: default@smb_join_results +POSTHOOK: Lineage: smb_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: explain +insert overwrite table smb_join_results +select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table smb_join_results +select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 48 Data size: 5008 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key > 1000)) (type: boolean) + Statistics: Num rows: 8 Data size: 834 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 8 Data size: 834 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 48 Data size: 5008 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key > 1000)) (type: boolean) + Statistics: Num rows: 8 Data size: 834 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 8 Data size: 834 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 8 Data size: 917 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 917 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 917 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table smb_join_results +select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +PREHOOK: Output: default@smb_join_results +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_join_results +select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +POSTHOOK: Output: default@smb_join_results +POSTHOOK: Lineage: smb_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: explain +select /*+mapjoin(b,c)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key join smb_bucket4_2 c on b.key = c.key where a.key>1000 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select /*+mapjoin(b,c)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key join smb_bucket4_2 c on b.key = c.key where a.key>1000 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 48 Data size: 5008 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key > 1000)) (type: boolean) + Statistics: Num rows: 8 Data size: 834 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 8 Data size: 834 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 48 Data size: 5008 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key > 1000)) (type: boolean) + Statistics: Num rows: 8 Data size: 834 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 8 Data size: 834 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 48 Data size: 5008 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key > 1000)) (type: boolean) + Statistics: Num rows: 8 Data size: 834 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 8 Data size: 834 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 17 Data size: 1834 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 17 Data size: 1834 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 17 Data size: 1834 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+mapjoin(b,c)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key join smb_bucket4_2 c on b.key = c.key where a.key>1000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b,c)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key join smb_bucket4_2 c on b.key = c.key where a.key>1000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out new file mode 100644 index 0000000..4ba1585 --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out @@ -0,0 +1,1260 @@ +PREHOOK: query: CREATE TABLE smb_bucket4_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket4_1 +POSTHOOK: query: CREATE TABLE smb_bucket4_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket4_1 +PREHOOK: query: CREATE TABLE smb_bucket4_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket4_2 +POSTHOOK: query: CREATE TABLE smb_bucket4_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket4_2 +PREHOOK: query: create table smb_join_results(k1 int, v1 string, k2 int, v2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_join_results +POSTHOOK: query: create table smb_join_results(k1 int, v1 string, k2 int, v2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_join_results +PREHOOK: query: create table smb_join_results_empty_bigtable(k1 int, v1 string, k2 int, v2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_join_results_empty_bigtable +POSTHOOK: query: create table smb_join_results_empty_bigtable(k1 int, v1 string, k2 int, v2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_join_results_empty_bigtable +PREHOOK: query: create table normal_join_results(k1 int, v1 string, k2 int, v2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@normal_join_results +POSTHOOK: query: create table normal_join_results(k1 int, v1 string, k2 int, v2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@normal_join_results +PREHOOK: query: load data local inpath '../../data/files/empty1.txt' into table smb_bucket4_1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket4_1 +POSTHOOK: query: load data local inpath '../../data/files/empty1.txt' into table smb_bucket4_1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket4_1 +PREHOOK: query: load data local inpath '../../data/files/empty2.txt' into table smb_bucket4_1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket4_1 +POSTHOOK: query: load data local inpath '../../data/files/empty2.txt' into table smb_bucket4_1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket4_1 +PREHOOK: query: insert overwrite table smb_bucket4_2 +select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@smb_bucket4_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_2 +select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@smb_bucket4_2 +POSTHOOK: Lineage: smb_bucket4_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: smb_bucket4_2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table smb_join_results_empty_bigtable +select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +PREHOOK: Output: default@smb_join_results_empty_bigtable +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_join_results_empty_bigtable +select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +POSTHOOK: Output: default@smb_join_results_empty_bigtable +POSTHOOK: Lineage: smb_join_results_empty_bigtable.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results_empty_bigtable.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results_empty_bigtable.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results_empty_bigtable.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: insert overwrite table smb_join_results_empty_bigtable +select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +PREHOOK: Output: default@smb_join_results_empty_bigtable +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_join_results_empty_bigtable +select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +POSTHOOK: Output: default@smb_join_results_empty_bigtable +POSTHOOK: Lineage: smb_join_results_empty_bigtable.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results_empty_bigtable.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results_empty_bigtable.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results_empty_bigtable.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from smb_join_results_empty_bigtable order by k1, v1, k2, v2 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_join_results_empty_bigtable +#### A masked pattern was here #### +POSTHOOK: query: select * from smb_join_results_empty_bigtable order by k1, v1, k2, v2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_join_results_empty_bigtable +#### A masked pattern was here #### +NULL NULL 0 val_0 +NULL NULL 0 val_0 +NULL NULL 0 val_0 +NULL NULL 2 val_2 +NULL NULL 4 val_4 +NULL NULL 5 val_5 +NULL NULL 5 val_5 +NULL NULL 5 val_5 +NULL NULL 8 val_8 +NULL NULL 9 val_9 +NULL NULL 10 val_10 +NULL NULL 11 val_11 +NULL NULL 12 val_12 +NULL NULL 12 val_12 +NULL NULL 15 val_15 +NULL NULL 15 val_15 +NULL NULL 17 val_17 +NULL NULL 18 val_18 +NULL NULL 18 val_18 +NULL NULL 19 val_19 +NULL NULL 20 val_20 +NULL NULL 24 val_24 +NULL NULL 24 val_24 +NULL NULL 26 val_26 +NULL NULL 26 val_26 +NULL NULL 27 val_27 +NULL NULL 28 val_28 +NULL NULL 30 val_30 +NULL NULL 33 val_33 +NULL NULL 34 val_34 +NULL NULL 35 val_35 +NULL NULL 35 val_35 +NULL NULL 35 val_35 +NULL NULL 37 val_37 +NULL NULL 37 val_37 +NULL NULL 41 val_41 +NULL NULL 42 val_42 +NULL NULL 42 val_42 +NULL NULL 43 val_43 +NULL NULL 44 val_44 +NULL NULL 47 val_47 +NULL NULL 51 val_51 +NULL NULL 51 val_51 +NULL NULL 53 val_53 +NULL NULL 54 val_54 +NULL NULL 57 val_57 +NULL NULL 58 val_58 +NULL NULL 58 val_58 +NULL NULL 64 val_64 +NULL NULL 65 val_65 +NULL NULL 66 val_66 +NULL NULL 67 val_67 +NULL NULL 67 val_67 +NULL NULL 69 val_69 +NULL NULL 70 val_70 +NULL NULL 70 val_70 +NULL NULL 70 val_70 +NULL NULL 72 val_72 +NULL NULL 72 val_72 +NULL NULL 74 val_74 +NULL NULL 76 val_76 +NULL NULL 76 val_76 +NULL NULL 77 val_77 +NULL NULL 78 val_78 +NULL NULL 80 val_80 +NULL NULL 82 val_82 +NULL NULL 83 val_83 +NULL NULL 83 val_83 +NULL NULL 84 val_84 +NULL NULL 84 val_84 +NULL NULL 85 val_85 +NULL NULL 86 val_86 +NULL NULL 87 val_87 +NULL NULL 90 val_90 +NULL NULL 90 val_90 +NULL NULL 90 val_90 +NULL NULL 92 val_92 +NULL NULL 95 val_95 +NULL NULL 95 val_95 +NULL NULL 96 val_96 +NULL NULL 97 val_97 +NULL NULL 97 val_97 +NULL NULL 98 val_98 +NULL NULL 98 val_98 +NULL NULL 100 val_100 +NULL NULL 100 val_100 +NULL NULL 103 val_103 +NULL NULL 103 val_103 +NULL NULL 104 val_104 +NULL NULL 104 val_104 +NULL NULL 105 val_105 +NULL NULL 111 val_111 +NULL NULL 113 val_113 +NULL NULL 113 val_113 +NULL NULL 114 val_114 +NULL NULL 116 val_116 +NULL NULL 118 val_118 +NULL NULL 118 val_118 +NULL NULL 119 val_119 +NULL NULL 119 val_119 +NULL NULL 119 val_119 +NULL NULL 120 val_120 +NULL NULL 120 val_120 +NULL NULL 125 val_125 +NULL NULL 125 val_125 +NULL NULL 126 val_126 +NULL NULL 128 val_128 +NULL NULL 128 val_128 +NULL NULL 128 val_128 +NULL NULL 129 val_129 +NULL NULL 129 val_129 +NULL NULL 131 val_131 +NULL NULL 133 val_133 +NULL NULL 134 val_134 +NULL NULL 134 val_134 +NULL NULL 136 val_136 +NULL NULL 137 val_137 +NULL NULL 137 val_137 +NULL NULL 138 val_138 +NULL NULL 138 val_138 +NULL NULL 138 val_138 +NULL NULL 138 val_138 +NULL NULL 143 val_143 +NULL NULL 145 val_145 +NULL NULL 146 val_146 +NULL NULL 146 val_146 +NULL NULL 149 val_149 +NULL NULL 149 val_149 +NULL NULL 150 val_150 +NULL NULL 152 val_152 +NULL NULL 152 val_152 +NULL NULL 153 val_153 +NULL NULL 155 val_155 +NULL NULL 156 val_156 +NULL NULL 157 val_157 +NULL NULL 158 val_158 +NULL NULL 160 val_160 +NULL NULL 162 val_162 +NULL NULL 163 val_163 +NULL NULL 164 val_164 +NULL NULL 164 val_164 +NULL NULL 165 val_165 +NULL NULL 165 val_165 +NULL NULL 166 val_166 +NULL NULL 167 val_167 +NULL NULL 167 val_167 +NULL NULL 167 val_167 +NULL NULL 168 val_168 +NULL NULL 169 val_169 +NULL NULL 169 val_169 +NULL NULL 169 val_169 +NULL NULL 169 val_169 +NULL NULL 170 val_170 +NULL NULL 172 val_172 +NULL NULL 172 val_172 +NULL NULL 174 val_174 +NULL NULL 174 val_174 +NULL NULL 175 val_175 +NULL NULL 175 val_175 +NULL NULL 176 val_176 +NULL NULL 176 val_176 +NULL NULL 177 val_177 +NULL NULL 178 val_178 +NULL NULL 179 val_179 +NULL NULL 179 val_179 +NULL NULL 180 val_180 +NULL NULL 181 val_181 +NULL NULL 183 val_183 +NULL NULL 186 val_186 +NULL NULL 187 val_187 +NULL NULL 187 val_187 +NULL NULL 187 val_187 +NULL NULL 189 val_189 +NULL NULL 190 val_190 +NULL NULL 191 val_191 +NULL NULL 191 val_191 +NULL NULL 192 val_192 +NULL NULL 193 val_193 +NULL NULL 193 val_193 +NULL NULL 193 val_193 +NULL NULL 194 val_194 +NULL NULL 195 val_195 +NULL NULL 195 val_195 +NULL NULL 196 val_196 +NULL NULL 197 val_197 +NULL NULL 197 val_197 +NULL NULL 199 val_199 +NULL NULL 199 val_199 +NULL NULL 199 val_199 +NULL NULL 200 val_200 +NULL NULL 200 val_200 +NULL NULL 201 val_201 +NULL NULL 202 val_202 +NULL NULL 203 val_203 +NULL NULL 203 val_203 +NULL NULL 205 val_205 +NULL NULL 205 val_205 +NULL NULL 207 val_207 +NULL NULL 207 val_207 +NULL NULL 208 val_208 +NULL NULL 208 val_208 +NULL NULL 208 val_208 +NULL NULL 209 val_209 +NULL NULL 209 val_209 +NULL NULL 213 val_213 +NULL NULL 213 val_213 +NULL NULL 214 val_214 +NULL NULL 216 val_216 +NULL NULL 216 val_216 +NULL NULL 217 val_217 +NULL NULL 217 val_217 +NULL NULL 218 val_218 +NULL NULL 219 val_219 +NULL NULL 219 val_219 +NULL NULL 221 val_221 +NULL NULL 221 val_221 +NULL NULL 222 val_222 +NULL NULL 223 val_223 +NULL NULL 223 val_223 +NULL NULL 224 val_224 +NULL NULL 224 val_224 +NULL NULL 226 val_226 +NULL NULL 228 val_228 +NULL NULL 229 val_229 +NULL NULL 229 val_229 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 233 val_233 +NULL NULL 233 val_233 +NULL NULL 235 val_235 +NULL NULL 237 val_237 +NULL NULL 237 val_237 +NULL NULL 238 val_238 +NULL NULL 238 val_238 +NULL NULL 239 val_239 +NULL NULL 239 val_239 +NULL NULL 241 val_241 +NULL NULL 242 val_242 +NULL NULL 242 val_242 +NULL NULL 244 val_244 +NULL NULL 247 val_247 +NULL NULL 248 val_248 +NULL NULL 249 val_249 +NULL NULL 252 val_252 +NULL NULL 255 val_255 +NULL NULL 255 val_255 +NULL NULL 256 val_256 +NULL NULL 256 val_256 +NULL NULL 257 val_257 +NULL NULL 258 val_258 +NULL NULL 260 val_260 +NULL NULL 262 val_262 +NULL NULL 263 val_263 +NULL NULL 265 val_265 +NULL NULL 265 val_265 +NULL NULL 266 val_266 +NULL NULL 272 val_272 +NULL NULL 272 val_272 +NULL NULL 273 val_273 +NULL NULL 273 val_273 +NULL NULL 273 val_273 +NULL NULL 274 val_274 +NULL NULL 275 val_275 +NULL NULL 277 val_277 +NULL NULL 277 val_277 +NULL NULL 277 val_277 +NULL NULL 277 val_277 +NULL NULL 278 val_278 +NULL NULL 278 val_278 +NULL NULL 280 val_280 +NULL NULL 280 val_280 +NULL NULL 281 val_281 +NULL NULL 281 val_281 +NULL NULL 282 val_282 +NULL NULL 282 val_282 +NULL NULL 283 val_283 +NULL NULL 284 val_284 +NULL NULL 285 val_285 +NULL NULL 286 val_286 +NULL NULL 287 val_287 +NULL NULL 288 val_288 +NULL NULL 288 val_288 +NULL NULL 289 val_289 +NULL NULL 291 val_291 +NULL NULL 292 val_292 +NULL NULL 296 val_296 +NULL NULL 298 val_298 +NULL NULL 298 val_298 +NULL NULL 298 val_298 +NULL NULL 302 val_302 +NULL NULL 305 val_305 +NULL NULL 306 val_306 +NULL NULL 307 val_307 +NULL NULL 307 val_307 +NULL NULL 308 val_308 +NULL NULL 309 val_309 +NULL NULL 309 val_309 +NULL NULL 310 val_310 +NULL NULL 311 val_311 +NULL NULL 311 val_311 +NULL NULL 311 val_311 +NULL NULL 315 val_315 +NULL NULL 316 val_316 +NULL NULL 316 val_316 +NULL NULL 316 val_316 +NULL NULL 317 val_317 +NULL NULL 317 val_317 +NULL NULL 318 val_318 +NULL NULL 318 val_318 +NULL NULL 318 val_318 +NULL NULL 321 val_321 +NULL NULL 321 val_321 +NULL NULL 322 val_322 +NULL NULL 322 val_322 +NULL NULL 323 val_323 +NULL NULL 325 val_325 +NULL NULL 325 val_325 +NULL NULL 327 val_327 +NULL NULL 327 val_327 +NULL NULL 327 val_327 +NULL NULL 331 val_331 +NULL NULL 331 val_331 +NULL NULL 332 val_332 +NULL NULL 333 val_333 +NULL NULL 333 val_333 +NULL NULL 335 val_335 +NULL NULL 336 val_336 +NULL NULL 338 val_338 +NULL NULL 339 val_339 +NULL NULL 341 val_341 +NULL NULL 342 val_342 +NULL NULL 342 val_342 +NULL NULL 344 val_344 +NULL NULL 344 val_344 +NULL NULL 345 val_345 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 351 val_351 +NULL NULL 353 val_353 +NULL NULL 353 val_353 +NULL NULL 356 val_356 +NULL NULL 360 val_360 +NULL NULL 362 val_362 +NULL NULL 364 val_364 +NULL NULL 365 val_365 +NULL NULL 366 val_366 +NULL NULL 367 val_367 +NULL NULL 367 val_367 +NULL NULL 368 val_368 +NULL NULL 369 val_369 +NULL NULL 369 val_369 +NULL NULL 369 val_369 +NULL NULL 373 val_373 +NULL NULL 374 val_374 +NULL NULL 375 val_375 +NULL NULL 377 val_377 +NULL NULL 378 val_378 +NULL NULL 379 val_379 +NULL NULL 382 val_382 +NULL NULL 382 val_382 +NULL NULL 384 val_384 +NULL NULL 384 val_384 +NULL NULL 384 val_384 +NULL NULL 386 val_386 +NULL NULL 389 val_389 +NULL NULL 392 val_392 +NULL NULL 393 val_393 +NULL NULL 394 val_394 +NULL NULL 395 val_395 +NULL NULL 395 val_395 +NULL NULL 396 val_396 +NULL NULL 396 val_396 +NULL NULL 396 val_396 +NULL NULL 397 val_397 +NULL NULL 397 val_397 +NULL NULL 399 val_399 +NULL NULL 399 val_399 +NULL NULL 400 val_400 +NULL NULL 401 val_401 +NULL NULL 401 val_401 +NULL NULL 401 val_401 +NULL NULL 401 val_401 +NULL NULL 401 val_401 +NULL NULL 402 val_402 +NULL NULL 403 val_403 +NULL NULL 403 val_403 +NULL NULL 403 val_403 +NULL NULL 404 val_404 +NULL NULL 404 val_404 +NULL NULL 406 val_406 +NULL NULL 406 val_406 +NULL NULL 406 val_406 +NULL NULL 406 val_406 +NULL NULL 407 val_407 +NULL NULL 409 val_409 +NULL NULL 409 val_409 +NULL NULL 409 val_409 +NULL NULL 411 val_411 +NULL NULL 413 val_413 +NULL NULL 413 val_413 +NULL NULL 414 val_414 +NULL NULL 414 val_414 +NULL NULL 417 val_417 +NULL NULL 417 val_417 +NULL NULL 417 val_417 +NULL NULL 418 val_418 +NULL NULL 419 val_419 +NULL NULL 421 val_421 +NULL NULL 424 val_424 +NULL NULL 424 val_424 +NULL NULL 427 val_427 +NULL NULL 429 val_429 +NULL NULL 429 val_429 +NULL NULL 430 val_430 +NULL NULL 430 val_430 +NULL NULL 430 val_430 +NULL NULL 431 val_431 +NULL NULL 431 val_431 +NULL NULL 431 val_431 +NULL NULL 432 val_432 +NULL NULL 435 val_435 +NULL NULL 436 val_436 +NULL NULL 437 val_437 +NULL NULL 438 val_438 +NULL NULL 438 val_438 +NULL NULL 438 val_438 +NULL NULL 439 val_439 +NULL NULL 439 val_439 +NULL NULL 443 val_443 +NULL NULL 444 val_444 +NULL NULL 446 val_446 +NULL NULL 448 val_448 +NULL NULL 449 val_449 +NULL NULL 452 val_452 +NULL NULL 453 val_453 +NULL NULL 454 val_454 +NULL NULL 454 val_454 +NULL NULL 454 val_454 +NULL NULL 455 val_455 +NULL NULL 457 val_457 +NULL NULL 458 val_458 +NULL NULL 458 val_458 +NULL NULL 459 val_459 +NULL NULL 459 val_459 +NULL NULL 460 val_460 +NULL NULL 462 val_462 +NULL NULL 462 val_462 +NULL NULL 463 val_463 +NULL NULL 463 val_463 +NULL NULL 466 val_466 +NULL NULL 466 val_466 +NULL NULL 466 val_466 +NULL NULL 467 val_467 +NULL NULL 468 val_468 +NULL NULL 468 val_468 +NULL NULL 468 val_468 +NULL NULL 468 val_468 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 470 val_470 +NULL NULL 472 val_472 +NULL NULL 475 val_475 +NULL NULL 477 val_477 +NULL NULL 478 val_478 +NULL NULL 478 val_478 +NULL NULL 479 val_479 +NULL NULL 480 val_480 +NULL NULL 480 val_480 +NULL NULL 480 val_480 +NULL NULL 481 val_481 +NULL NULL 482 val_482 +NULL NULL 483 val_483 +NULL NULL 484 val_484 +NULL NULL 485 val_485 +NULL NULL 487 val_487 +NULL NULL 489 val_489 +NULL NULL 489 val_489 +NULL NULL 489 val_489 +NULL NULL 489 val_489 +NULL NULL 490 val_490 +NULL NULL 491 val_491 +NULL NULL 492 val_492 +NULL NULL 492 val_492 +NULL NULL 493 val_493 +NULL NULL 494 val_494 +NULL NULL 495 val_495 +NULL NULL 496 val_496 +NULL NULL 497 val_497 +NULL NULL 498 val_498 +NULL NULL 498 val_498 +NULL NULL 498 val_498 +PREHOOK: query: explain +insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +PREHOOK: Output: default@smb_join_results +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_join_results +select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +POSTHOOK: Output: default@smb_join_results +POSTHOOK: Lineage: smb_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from smb_join_results order by k1, v1, k2, v2 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_join_results +#### A masked pattern was here #### +POSTHOOK: query: select * from smb_join_results order by k1, v1, k2, v2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_join_results +#### A masked pattern was here #### +NULL NULL 0 val_0 +NULL NULL 0 val_0 +NULL NULL 0 val_0 +NULL NULL 2 val_2 +NULL NULL 4 val_4 +NULL NULL 5 val_5 +NULL NULL 5 val_5 +NULL NULL 5 val_5 +NULL NULL 8 val_8 +NULL NULL 9 val_9 +NULL NULL 10 val_10 +NULL NULL 11 val_11 +NULL NULL 12 val_12 +NULL NULL 12 val_12 +NULL NULL 15 val_15 +NULL NULL 15 val_15 +NULL NULL 17 val_17 +NULL NULL 18 val_18 +NULL NULL 18 val_18 +NULL NULL 19 val_19 +NULL NULL 20 val_20 +NULL NULL 24 val_24 +NULL NULL 24 val_24 +NULL NULL 26 val_26 +NULL NULL 26 val_26 +NULL NULL 27 val_27 +NULL NULL 28 val_28 +NULL NULL 30 val_30 +NULL NULL 33 val_33 +NULL NULL 34 val_34 +NULL NULL 35 val_35 +NULL NULL 35 val_35 +NULL NULL 35 val_35 +NULL NULL 37 val_37 +NULL NULL 37 val_37 +NULL NULL 41 val_41 +NULL NULL 42 val_42 +NULL NULL 42 val_42 +NULL NULL 43 val_43 +NULL NULL 44 val_44 +NULL NULL 47 val_47 +NULL NULL 51 val_51 +NULL NULL 51 val_51 +NULL NULL 53 val_53 +NULL NULL 54 val_54 +NULL NULL 57 val_57 +NULL NULL 58 val_58 +NULL NULL 58 val_58 +NULL NULL 64 val_64 +NULL NULL 65 val_65 +NULL NULL 66 val_66 +NULL NULL 67 val_67 +NULL NULL 67 val_67 +NULL NULL 69 val_69 +NULL NULL 70 val_70 +NULL NULL 70 val_70 +NULL NULL 70 val_70 +NULL NULL 72 val_72 +NULL NULL 72 val_72 +NULL NULL 74 val_74 +NULL NULL 76 val_76 +NULL NULL 76 val_76 +NULL NULL 77 val_77 +NULL NULL 78 val_78 +NULL NULL 80 val_80 +NULL NULL 82 val_82 +NULL NULL 83 val_83 +NULL NULL 83 val_83 +NULL NULL 84 val_84 +NULL NULL 84 val_84 +NULL NULL 85 val_85 +NULL NULL 86 val_86 +NULL NULL 87 val_87 +NULL NULL 90 val_90 +NULL NULL 90 val_90 +NULL NULL 90 val_90 +NULL NULL 92 val_92 +NULL NULL 95 val_95 +NULL NULL 95 val_95 +NULL NULL 96 val_96 +NULL NULL 97 val_97 +NULL NULL 97 val_97 +NULL NULL 98 val_98 +NULL NULL 98 val_98 +NULL NULL 100 val_100 +NULL NULL 100 val_100 +NULL NULL 103 val_103 +NULL NULL 103 val_103 +NULL NULL 104 val_104 +NULL NULL 104 val_104 +NULL NULL 105 val_105 +NULL NULL 111 val_111 +NULL NULL 113 val_113 +NULL NULL 113 val_113 +NULL NULL 114 val_114 +NULL NULL 116 val_116 +NULL NULL 118 val_118 +NULL NULL 118 val_118 +NULL NULL 119 val_119 +NULL NULL 119 val_119 +NULL NULL 119 val_119 +NULL NULL 120 val_120 +NULL NULL 120 val_120 +NULL NULL 125 val_125 +NULL NULL 125 val_125 +NULL NULL 126 val_126 +NULL NULL 128 val_128 +NULL NULL 128 val_128 +NULL NULL 128 val_128 +NULL NULL 129 val_129 +NULL NULL 129 val_129 +NULL NULL 131 val_131 +NULL NULL 133 val_133 +NULL NULL 134 val_134 +NULL NULL 134 val_134 +NULL NULL 136 val_136 +NULL NULL 137 val_137 +NULL NULL 137 val_137 +NULL NULL 138 val_138 +NULL NULL 138 val_138 +NULL NULL 138 val_138 +NULL NULL 138 val_138 +NULL NULL 143 val_143 +NULL NULL 145 val_145 +NULL NULL 146 val_146 +NULL NULL 146 val_146 +NULL NULL 149 val_149 +NULL NULL 149 val_149 +NULL NULL 150 val_150 +NULL NULL 152 val_152 +NULL NULL 152 val_152 +NULL NULL 153 val_153 +NULL NULL 155 val_155 +NULL NULL 156 val_156 +NULL NULL 157 val_157 +NULL NULL 158 val_158 +NULL NULL 160 val_160 +NULL NULL 162 val_162 +NULL NULL 163 val_163 +NULL NULL 164 val_164 +NULL NULL 164 val_164 +NULL NULL 165 val_165 +NULL NULL 165 val_165 +NULL NULL 166 val_166 +NULL NULL 167 val_167 +NULL NULL 167 val_167 +NULL NULL 167 val_167 +NULL NULL 168 val_168 +NULL NULL 169 val_169 +NULL NULL 169 val_169 +NULL NULL 169 val_169 +NULL NULL 169 val_169 +NULL NULL 170 val_170 +NULL NULL 172 val_172 +NULL NULL 172 val_172 +NULL NULL 174 val_174 +NULL NULL 174 val_174 +NULL NULL 175 val_175 +NULL NULL 175 val_175 +NULL NULL 176 val_176 +NULL NULL 176 val_176 +NULL NULL 177 val_177 +NULL NULL 178 val_178 +NULL NULL 179 val_179 +NULL NULL 179 val_179 +NULL NULL 180 val_180 +NULL NULL 181 val_181 +NULL NULL 183 val_183 +NULL NULL 186 val_186 +NULL NULL 187 val_187 +NULL NULL 187 val_187 +NULL NULL 187 val_187 +NULL NULL 189 val_189 +NULL NULL 190 val_190 +NULL NULL 191 val_191 +NULL NULL 191 val_191 +NULL NULL 192 val_192 +NULL NULL 193 val_193 +NULL NULL 193 val_193 +NULL NULL 193 val_193 +NULL NULL 194 val_194 +NULL NULL 195 val_195 +NULL NULL 195 val_195 +NULL NULL 196 val_196 +NULL NULL 197 val_197 +NULL NULL 197 val_197 +NULL NULL 199 val_199 +NULL NULL 199 val_199 +NULL NULL 199 val_199 +NULL NULL 200 val_200 +NULL NULL 200 val_200 +NULL NULL 201 val_201 +NULL NULL 202 val_202 +NULL NULL 203 val_203 +NULL NULL 203 val_203 +NULL NULL 205 val_205 +NULL NULL 205 val_205 +NULL NULL 207 val_207 +NULL NULL 207 val_207 +NULL NULL 208 val_208 +NULL NULL 208 val_208 +NULL NULL 208 val_208 +NULL NULL 209 val_209 +NULL NULL 209 val_209 +NULL NULL 213 val_213 +NULL NULL 213 val_213 +NULL NULL 214 val_214 +NULL NULL 216 val_216 +NULL NULL 216 val_216 +NULL NULL 217 val_217 +NULL NULL 217 val_217 +NULL NULL 218 val_218 +NULL NULL 219 val_219 +NULL NULL 219 val_219 +NULL NULL 221 val_221 +NULL NULL 221 val_221 +NULL NULL 222 val_222 +NULL NULL 223 val_223 +NULL NULL 223 val_223 +NULL NULL 224 val_224 +NULL NULL 224 val_224 +NULL NULL 226 val_226 +NULL NULL 228 val_228 +NULL NULL 229 val_229 +NULL NULL 229 val_229 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 233 val_233 +NULL NULL 233 val_233 +NULL NULL 235 val_235 +NULL NULL 237 val_237 +NULL NULL 237 val_237 +NULL NULL 238 val_238 +NULL NULL 238 val_238 +NULL NULL 239 val_239 +NULL NULL 239 val_239 +NULL NULL 241 val_241 +NULL NULL 242 val_242 +NULL NULL 242 val_242 +NULL NULL 244 val_244 +NULL NULL 247 val_247 +NULL NULL 248 val_248 +NULL NULL 249 val_249 +NULL NULL 252 val_252 +NULL NULL 255 val_255 +NULL NULL 255 val_255 +NULL NULL 256 val_256 +NULL NULL 256 val_256 +NULL NULL 257 val_257 +NULL NULL 258 val_258 +NULL NULL 260 val_260 +NULL NULL 262 val_262 +NULL NULL 263 val_263 +NULL NULL 265 val_265 +NULL NULL 265 val_265 +NULL NULL 266 val_266 +NULL NULL 272 val_272 +NULL NULL 272 val_272 +NULL NULL 273 val_273 +NULL NULL 273 val_273 +NULL NULL 273 val_273 +NULL NULL 274 val_274 +NULL NULL 275 val_275 +NULL NULL 277 val_277 +NULL NULL 277 val_277 +NULL NULL 277 val_277 +NULL NULL 277 val_277 +NULL NULL 278 val_278 +NULL NULL 278 val_278 +NULL NULL 280 val_280 +NULL NULL 280 val_280 +NULL NULL 281 val_281 +NULL NULL 281 val_281 +NULL NULL 282 val_282 +NULL NULL 282 val_282 +NULL NULL 283 val_283 +NULL NULL 284 val_284 +NULL NULL 285 val_285 +NULL NULL 286 val_286 +NULL NULL 287 val_287 +NULL NULL 288 val_288 +NULL NULL 288 val_288 +NULL NULL 289 val_289 +NULL NULL 291 val_291 +NULL NULL 292 val_292 +NULL NULL 296 val_296 +NULL NULL 298 val_298 +NULL NULL 298 val_298 +NULL NULL 298 val_298 +NULL NULL 302 val_302 +NULL NULL 305 val_305 +NULL NULL 306 val_306 +NULL NULL 307 val_307 +NULL NULL 307 val_307 +NULL NULL 308 val_308 +NULL NULL 309 val_309 +NULL NULL 309 val_309 +NULL NULL 310 val_310 +NULL NULL 311 val_311 +NULL NULL 311 val_311 +NULL NULL 311 val_311 +NULL NULL 315 val_315 +NULL NULL 316 val_316 +NULL NULL 316 val_316 +NULL NULL 316 val_316 +NULL NULL 317 val_317 +NULL NULL 317 val_317 +NULL NULL 318 val_318 +NULL NULL 318 val_318 +NULL NULL 318 val_318 +NULL NULL 321 val_321 +NULL NULL 321 val_321 +NULL NULL 322 val_322 +NULL NULL 322 val_322 +NULL NULL 323 val_323 +NULL NULL 325 val_325 +NULL NULL 325 val_325 +NULL NULL 327 val_327 +NULL NULL 327 val_327 +NULL NULL 327 val_327 +NULL NULL 331 val_331 +NULL NULL 331 val_331 +NULL NULL 332 val_332 +NULL NULL 333 val_333 +NULL NULL 333 val_333 +NULL NULL 335 val_335 +NULL NULL 336 val_336 +NULL NULL 338 val_338 +NULL NULL 339 val_339 +NULL NULL 341 val_341 +NULL NULL 342 val_342 +NULL NULL 342 val_342 +NULL NULL 344 val_344 +NULL NULL 344 val_344 +NULL NULL 345 val_345 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 351 val_351 +NULL NULL 353 val_353 +NULL NULL 353 val_353 +NULL NULL 356 val_356 +NULL NULL 360 val_360 +NULL NULL 362 val_362 +NULL NULL 364 val_364 +NULL NULL 365 val_365 +NULL NULL 366 val_366 +NULL NULL 367 val_367 +NULL NULL 367 val_367 +NULL NULL 368 val_368 +NULL NULL 369 val_369 +NULL NULL 369 val_369 +NULL NULL 369 val_369 +NULL NULL 373 val_373 +NULL NULL 374 val_374 +NULL NULL 375 val_375 +NULL NULL 377 val_377 +NULL NULL 378 val_378 +NULL NULL 379 val_379 +NULL NULL 382 val_382 +NULL NULL 382 val_382 +NULL NULL 384 val_384 +NULL NULL 384 val_384 +NULL NULL 384 val_384 +NULL NULL 386 val_386 +NULL NULL 389 val_389 +NULL NULL 392 val_392 +NULL NULL 393 val_393 +NULL NULL 394 val_394 +NULL NULL 395 val_395 +NULL NULL 395 val_395 +NULL NULL 396 val_396 +NULL NULL 396 val_396 +NULL NULL 396 val_396 +NULL NULL 397 val_397 +NULL NULL 397 val_397 +NULL NULL 399 val_399 +NULL NULL 399 val_399 +NULL NULL 400 val_400 +NULL NULL 401 val_401 +NULL NULL 401 val_401 +NULL NULL 401 val_401 +NULL NULL 401 val_401 +NULL NULL 401 val_401 +NULL NULL 402 val_402 +NULL NULL 403 val_403 +NULL NULL 403 val_403 +NULL NULL 403 val_403 +NULL NULL 404 val_404 +NULL NULL 404 val_404 +NULL NULL 406 val_406 +NULL NULL 406 val_406 +NULL NULL 406 val_406 +NULL NULL 406 val_406 +NULL NULL 407 val_407 +NULL NULL 409 val_409 +NULL NULL 409 val_409 +NULL NULL 409 val_409 +NULL NULL 411 val_411 +NULL NULL 413 val_413 +NULL NULL 413 val_413 +NULL NULL 414 val_414 +NULL NULL 414 val_414 +NULL NULL 417 val_417 +NULL NULL 417 val_417 +NULL NULL 417 val_417 +NULL NULL 418 val_418 +NULL NULL 419 val_419 +NULL NULL 421 val_421 +NULL NULL 424 val_424 +NULL NULL 424 val_424 +NULL NULL 427 val_427 +NULL NULL 429 val_429 +NULL NULL 429 val_429 +NULL NULL 430 val_430 +NULL NULL 430 val_430 +NULL NULL 430 val_430 +NULL NULL 431 val_431 +NULL NULL 431 val_431 +NULL NULL 431 val_431 +NULL NULL 432 val_432 +NULL NULL 435 val_435 +NULL NULL 436 val_436 +NULL NULL 437 val_437 +NULL NULL 438 val_438 +NULL NULL 438 val_438 +NULL NULL 438 val_438 +NULL NULL 439 val_439 +NULL NULL 439 val_439 +NULL NULL 443 val_443 +NULL NULL 444 val_444 +NULL NULL 446 val_446 +NULL NULL 448 val_448 +NULL NULL 449 val_449 +NULL NULL 452 val_452 +NULL NULL 453 val_453 +NULL NULL 454 val_454 +NULL NULL 454 val_454 +NULL NULL 454 val_454 +NULL NULL 455 val_455 +NULL NULL 457 val_457 +NULL NULL 458 val_458 +NULL NULL 458 val_458 +NULL NULL 459 val_459 +NULL NULL 459 val_459 +NULL NULL 460 val_460 +NULL NULL 462 val_462 +NULL NULL 462 val_462 +NULL NULL 463 val_463 +NULL NULL 463 val_463 +NULL NULL 466 val_466 +NULL NULL 466 val_466 +NULL NULL 466 val_466 +NULL NULL 467 val_467 +NULL NULL 468 val_468 +NULL NULL 468 val_468 +NULL NULL 468 val_468 +NULL NULL 468 val_468 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 470 val_470 +NULL NULL 472 val_472 +NULL NULL 475 val_475 +NULL NULL 477 val_477 +NULL NULL 478 val_478 +NULL NULL 478 val_478 +NULL NULL 479 val_479 +NULL NULL 480 val_480 +NULL NULL 480 val_480 +NULL NULL 480 val_480 +NULL NULL 481 val_481 +NULL NULL 482 val_482 +NULL NULL 483 val_483 +NULL NULL 484 val_484 +NULL NULL 485 val_485 +NULL NULL 487 val_487 +NULL NULL 489 val_489 +NULL NULL 489 val_489 +NULL NULL 489 val_489 +NULL NULL 489 val_489 +NULL NULL 490 val_490 +NULL NULL 491 val_491 +NULL NULL 492 val_492 +NULL NULL 492 val_492 +NULL NULL 493 val_493 +NULL NULL 494 val_494 +NULL NULL 495 val_495 +NULL NULL 496 val_496 +NULL NULL 497 val_497 +NULL NULL 498 val_498 +NULL NULL 498 val_498 +NULL NULL 498 val_498 +PREHOOK: query: insert overwrite table normal_join_results select * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +PREHOOK: Output: default@normal_join_results +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table normal_join_results select * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +POSTHOOK: Output: default@normal_join_results +POSTHOOK: Lineage: normal_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: normal_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from normal_join_results +PREHOOK: type: QUERY +PREHOOK: Input: default@normal_join_results +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from normal_join_results +POSTHOOK: type: QUERY +POSTHOOK: Input: default@normal_join_results +#### A masked pattern was here #### +0 130091 0 36210398070 +PREHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from smb_join_results +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_join_results +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from smb_join_results +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_join_results +#### A masked pattern was here #### +0 130091 0 36210398070 +PREHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from smb_join_results_empty_bigtable +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_join_results_empty_bigtable +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from smb_join_results_empty_bigtable +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_join_results_empty_bigtable +#### A masked pattern was here #### +0 130091 0 36210398070 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_8.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_8.q.out new file mode 100644 index 0000000..aada65b --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_8.q.out @@ -0,0 +1,487 @@ +PREHOOK: query: create table smb_bucket_input (key int, value string) stored as rcfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket_input +POSTHOOK: query: create table smb_bucket_input (key int, value string) stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket_input +PREHOOK: query: load data local inpath '../../data/files/smb_bucket_input.rc' into table smb_bucket_input +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@smb_bucket_input +POSTHOOK: query: load data local inpath '../../data/files/smb_bucket_input.rc' into table smb_bucket_input +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@smb_bucket_input +PREHOOK: query: CREATE TABLE smb_bucket4_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket4_1 +POSTHOOK: query: CREATE TABLE smb_bucket4_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket4_1 +PREHOOK: query: CREATE TABLE smb_bucket4_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket4_2 +POSTHOOK: query: CREATE TABLE smb_bucket4_2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket4_2 +PREHOOK: query: CREATE TABLE smb_bucket4_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_bucket4_3 +POSTHOOK: query: CREATE TABLE smb_bucket4_3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_bucket4_3 +PREHOOK: query: insert overwrite table smb_bucket4_1 select * from smb_bucket_input where key=4 or key=2000 or key=4000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_1 select * from smb_bucket_input where key=4 or key=2000 or key=4000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_1 +POSTHOOK: Lineage: smb_bucket4_1.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_1.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: insert overwrite table smb_bucket4_2 select * from smb_bucket_input where key=484 or key=3000 or key=5000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_2 select * from smb_bucket_input where key=484 or key=3000 or key=5000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_2 +POSTHOOK: Lineage: smb_bucket4_2.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_2.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +#### A masked pattern was here #### +4 val_356 NULL NULL +NULL NULL 484 val_169 +2000 val_169 NULL NULL +NULL NULL 3000 val_169 +4000 val_125 NULL NULL +NULL NULL 5000 val_125 +PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +#### A masked pattern was here #### +4 val_356 NULL NULL +NULL NULL 484 val_169 +2000 val_169 NULL NULL +NULL NULL 3000 val_169 +4000 val_125 NULL NULL +NULL NULL 5000 val_125 +PREHOOK: query: insert overwrite table smb_bucket4_1 select * from smb_bucket_input where key=2000 or key=4000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_1 select * from smb_bucket_input where key=2000 or key=4000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_1 +POSTHOOK: Lineage: smb_bucket4_1.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_1.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: insert overwrite table smb_bucket4_2 select * from smb_bucket_input where key=3000 or key=5000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_2 select * from smb_bucket_input where key=3000 or key=5000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_2 +POSTHOOK: Lineage: smb_bucket4_2.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_2.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +#### A masked pattern was here #### +2000 val_169 NULL NULL +NULL NULL 3000 val_169 +4000 val_125 NULL NULL +NULL NULL 5000 val_125 +PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +#### A masked pattern was here #### +2000 val_169 NULL NULL +NULL NULL 3000 val_169 +4000 val_125 NULL NULL +NULL NULL 5000 val_125 +PREHOOK: query: insert overwrite table smb_bucket4_1 select * from smb_bucket_input where key=4000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_1 select * from smb_bucket_input where key=4000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_1 +POSTHOOK: Lineage: smb_bucket4_1.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_1.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: insert overwrite table smb_bucket4_2 select * from smb_bucket_input where key=5000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_2 select * from smb_bucket_input where key=5000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_2 +POSTHOOK: Lineage: smb_bucket4_2.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_2.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +#### A masked pattern was here #### +4000 val_125 NULL NULL +NULL NULL 5000 val_125 +PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +#### A masked pattern was here #### +4000 val_125 NULL NULL +NULL NULL 5000 val_125 +PREHOOK: query: insert overwrite table smb_bucket4_1 select * from smb_bucket_input where key=1000 or key=4000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_1 select * from smb_bucket_input where key=1000 or key=4000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_1 +POSTHOOK: Lineage: smb_bucket4_1.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_1.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: insert overwrite table smb_bucket4_2 select * from smb_bucket_input where key=1000 or key=5000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_2 select * from smb_bucket_input where key=1000 or key=5000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_2 +POSTHOOK: Lineage: smb_bucket4_2.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_2.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +#### A masked pattern was here #### +1000 val_1000 1000 val_1000 +4000 val_125 NULL NULL +NULL NULL 5000 val_125 +PREHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +#### A masked pattern was here #### +1000 val_1000 1000 val_1000 +4000 val_125 NULL NULL +NULL NULL 5000 val_125 +PREHOOK: query: insert overwrite table smb_bucket4_1 select * from smb_bucket_input where key=1000 or key=4000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_1 select * from smb_bucket_input where key=1000 or key=4000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_1 +POSTHOOK: Lineage: smb_bucket4_1.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_1.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: insert overwrite table smb_bucket4_2 select * from smb_bucket_input where key=1000 or key=5000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_2 select * from smb_bucket_input where key=1000 or key=5000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_2 +POSTHOOK: Lineage: smb_bucket4_2.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_2.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: insert overwrite table smb_bucket4_3 select * from smb_bucket_input where key=1000 or key=5000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_3 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_3 select * from smb_bucket_input where key=1000 or key=5000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_3 +POSTHOOK: Lineage: smb_bucket4_3.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_3.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select /*+mapjoin(b,c)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +full outer join smb_bucket4_3 c on a.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +PREHOOK: Input: default@smb_bucket4_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b,c)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +full outer join smb_bucket4_3 c on a.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +POSTHOOK: Input: default@smb_bucket4_3 +#### A masked pattern was here #### +1000 val_1000 1000 val_1000 1000 val_1000 +4000 val_125 NULL NULL NULL NULL +NULL NULL 5000 val_125 NULL NULL +NULL NULL NULL NULL 5000 val_125 +PREHOOK: query: insert overwrite table smb_bucket4_1 select * from smb_bucket_input where key=1000 or key=4000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_1 select * from smb_bucket_input where key=1000 or key=4000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_1 +POSTHOOK: Lineage: smb_bucket4_1.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_1.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: insert overwrite table smb_bucket4_2 select * from smb_bucket_input where key=1000 or key=5000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_2 select * from smb_bucket_input where key=1000 or key=5000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_2 +POSTHOOK: Lineage: smb_bucket4_2.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_2.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: insert overwrite table smb_bucket4_3 select * from smb_bucket_input where key=1000 or key=4000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_3 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_3 select * from smb_bucket_input where key=1000 or key=4000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_3 +POSTHOOK: Lineage: smb_bucket4_3.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_3.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select /*+mapjoin(b,c)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +full outer join smb_bucket4_3 c on a.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +PREHOOK: Input: default@smb_bucket4_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b,c)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +full outer join smb_bucket4_3 c on a.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +POSTHOOK: Input: default@smb_bucket4_3 +#### A masked pattern was here #### +1000 val_1000 1000 val_1000 1000 val_1000 +4000 val_125 NULL NULL 4000 val_125 +NULL NULL 5000 val_125 NULL NULL +PREHOOK: query: insert overwrite table smb_bucket4_1 select * from smb_bucket_input where key=4000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_1 select * from smb_bucket_input where key=4000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_1 +POSTHOOK: Lineage: smb_bucket4_1.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_1.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: insert overwrite table smb_bucket4_2 select * from smb_bucket_input where key=5000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_2 select * from smb_bucket_input where key=5000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_2 +POSTHOOK: Lineage: smb_bucket4_2.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_2.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: insert overwrite table smb_bucket4_3 select * from smb_bucket_input where key=4000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_3 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_3 select * from smb_bucket_input where key=4000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_3 +POSTHOOK: Lineage: smb_bucket4_3.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_3.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select /*+mapjoin(b,c)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +full outer join smb_bucket4_3 c on a.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +PREHOOK: Input: default@smb_bucket4_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b,c)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +full outer join smb_bucket4_3 c on a.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +POSTHOOK: Input: default@smb_bucket4_3 +#### A masked pattern was here #### +4000 val_125 NULL NULL 4000 val_125 +NULL NULL 5000 val_125 NULL NULL +PREHOOK: query: insert overwrite table smb_bucket4_1 select * from smb_bucket_input where key=00000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_1 select * from smb_bucket_input where key=00000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_1 +POSTHOOK: Lineage: smb_bucket4_1.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_1.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: insert overwrite table smb_bucket4_2 select * from smb_bucket_input where key=4000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_2 select * from smb_bucket_input where key=4000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_2 +POSTHOOK: Lineage: smb_bucket4_2.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_2.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: insert overwrite table smb_bucket4_3 select * from smb_bucket_input where key=5000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_3 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_3 select * from smb_bucket_input where key=5000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_3 +POSTHOOK: Lineage: smb_bucket4_3.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_3.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select /*+mapjoin(b,c)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +full outer join smb_bucket4_3 c on a.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +PREHOOK: Input: default@smb_bucket4_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b,c)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +full outer join smb_bucket4_3 c on a.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +POSTHOOK: Input: default@smb_bucket4_3 +#### A masked pattern was here #### +NULL NULL 4000 val_125 NULL NULL +NULL NULL NULL NULL 5000 val_125 +PREHOOK: query: insert overwrite table smb_bucket4_1 select * from smb_bucket_input where key=1000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_1 select * from smb_bucket_input where key=1000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_1 +POSTHOOK: Lineage: smb_bucket4_1.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_1.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: insert overwrite table smb_bucket4_2 select * from smb_bucket_input where key=4000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_2 select * from smb_bucket_input where key=4000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_2 +POSTHOOK: Lineage: smb_bucket4_2.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_2.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: insert overwrite table smb_bucket4_3 select * from smb_bucket_input where key=5000 +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket_input +PREHOOK: Output: default@smb_bucket4_3 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table smb_bucket4_3 select * from smb_bucket_input where key=5000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket_input +POSTHOOK: Output: default@smb_bucket4_3 +POSTHOOK: Lineage: smb_bucket4_3.key SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_bucket4_3.value SIMPLE [(smb_bucket_input)smb_bucket_input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select /*+mapjoin(b,c)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +full outer join smb_bucket4_3 c on a.key=c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_bucket4_1 +PREHOOK: Input: default@smb_bucket4_2 +PREHOOK: Input: default@smb_bucket4_3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+mapjoin(b,c)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key +full outer join smb_bucket4_3 c on a.key=c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_bucket4_1 +POSTHOOK: Input: default@smb_bucket4_2 +POSTHOOK: Input: default@smb_bucket4_3 +#### A masked pattern was here #### +1000 val_1000 NULL NULL NULL NULL +NULL NULL 4000 val_125 NULL NULL +NULL NULL NULL NULL 5000 val_125 diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out new file mode 100644 index 0000000..32c3818 --- /dev/null +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out @@ -0,0 +1,152 @@ +PREHOOK: query: drop table table_desc1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table table_desc1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table table_desc2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table table_desc2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table table_desc1(key string, value string) clustered by (key) sorted by (key DESC) into 1 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_desc1 +POSTHOOK: query: create table table_desc1(key string, value string) clustered by (key) sorted by (key DESC) into 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_desc1 +PREHOOK: query: create table table_desc2(key string, value string) clustered by (key) sorted by (key DESC) into 1 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_desc2 +POSTHOOK: query: create table table_desc2(key string, value string) clustered by (key) sorted by (key DESC) into 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_desc2 +PREHOOK: query: insert overwrite table table_desc1 select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@table_desc1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table table_desc1 select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@table_desc1 +POSTHOOK: Lineage: table_desc1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: table_desc1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table table_desc2 select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@table_desc2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table table_desc2 select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@table_desc2 +POSTHOOK: Lineage: table_desc2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: table_desc2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- The columns of the tables above are sorted in same descending order. +-- So, sort merge join should be performed + +explain +select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key where a.key < 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- The columns of the tables above are sorted in same descending order. +-- So, sort merge join should be performed + +explain +select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key where a.key < 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key < 10)) (type: boolean) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key < 10)) (type: boolean) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 9 Data size: 991 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 9 Data size: 991 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key where a.key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@table_desc1 +PREHOOK: Input: default@table_desc2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key where a.key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_desc1 +POSTHOOK: Input: default@table_desc2 +#### A masked pattern was here #### +22 diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out new file mode 100644 index 0000000..ae08516 --- /dev/null +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out @@ -0,0 +1,162 @@ +PREHOOK: query: drop table table_desc1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table table_desc1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table table_desc2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table table_desc2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table table_desc1(key string, value string) clustered by (key, value) +sorted by (key DESC, value DESC) into 1 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_desc1 +POSTHOOK: query: create table table_desc1(key string, value string) clustered by (key, value) +sorted by (key DESC, value DESC) into 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_desc1 +PREHOOK: query: create table table_desc2(key string, value string) clustered by (key, value) +sorted by (key DESC, value DESC) into 1 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_desc2 +POSTHOOK: query: create table table_desc2(key string, value string) clustered by (key, value) +sorted by (key DESC, value DESC) into 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_desc2 +PREHOOK: query: insert overwrite table table_desc1 select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@table_desc1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table table_desc1 select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@table_desc1 +POSTHOOK: Lineage: table_desc1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: table_desc1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table table_desc2 select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@table_desc2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table table_desc2 select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@table_desc2 +POSTHOOK: Lineage: table_desc2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: table_desc2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- The columns of the tables above are sorted in same order. +-- descending followed by descending +-- So, sort merge join should be performed + +explain +select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b +on a.key=b.key and a.value=b.value where a.key < 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- The columns of the tables above are sorted in same order. +-- descending followed by descending +-- So, sort merge join should be performed + +explain +select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b +on a.key=b.key and a.value=b.value where a.key < 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key is not null and value is not null) and (key < 10)) (type: boolean) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key is not null and value is not null) and (key < 10)) (type: boolean) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 2 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 2 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b +on a.key=b.key and a.value=b.value where a.key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@table_desc1 +PREHOOK: Input: default@table_desc2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b +on a.key=b.key and a.value=b.value where a.key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_desc1 +POSTHOOK: Input: default@table_desc2 +#### A masked pattern was here #### +22 diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out new file mode 100644 index 0000000..6add9f9 --- /dev/null +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out @@ -0,0 +1,162 @@ +PREHOOK: query: drop table table_desc1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table table_desc1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table table_desc2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table table_desc2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table table_desc1(key string, value string) clustered by (key, value) +sorted by (key DESC, value ASC) into 1 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_desc1 +POSTHOOK: query: create table table_desc1(key string, value string) clustered by (key, value) +sorted by (key DESC, value ASC) into 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_desc1 +PREHOOK: query: create table table_desc2(key string, value string) clustered by (key, value) +sorted by (key DESC, value ASC) into 1 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_desc2 +POSTHOOK: query: create table table_desc2(key string, value string) clustered by (key, value) +sorted by (key DESC, value ASC) into 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_desc2 +PREHOOK: query: insert overwrite table table_desc1 select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@table_desc1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table table_desc1 select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@table_desc1 +POSTHOOK: Lineage: table_desc1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: table_desc1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table table_desc2 select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@table_desc2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table table_desc2 select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@table_desc2 +POSTHOOK: Lineage: table_desc2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: table_desc2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- The columns of the tables above are sorted in same orders. +-- descending followed by ascending +-- So, sort merge join should be performed + +explain +select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b +on a.key=b.key and a.value=b.value where a.key < 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- The columns of the tables above are sorted in same orders. +-- descending followed by ascending +-- So, sort merge join should be performed + +explain +select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b +on a.key=b.key and a.value=b.value where a.key < 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key is not null and value is not null) and (key < 10)) (type: boolean) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key is not null and value is not null) and (key < 10)) (type: boolean) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 2 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 2 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b +on a.key=b.key and a.value=b.value where a.key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@table_desc1 +PREHOOK: Input: default@table_desc2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b +on a.key=b.key and a.value=b.value where a.key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_desc1 +POSTHOOK: Input: default@table_desc2 +#### A masked pattern was here #### +22 diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out new file mode 100644 index 0000000..b810a56 --- /dev/null +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out @@ -0,0 +1,160 @@ +PREHOOK: query: drop table table_desc1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table table_desc1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table table_desc2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table table_desc2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table table_desc1(key string, value string) clustered by (key, value) +sorted by (key DESC, value ASC) into 1 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_desc1 +POSTHOOK: query: create table table_desc1(key string, value string) clustered by (key, value) +sorted by (key DESC, value ASC) into 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_desc1 +PREHOOK: query: create table table_desc2(key string, value string) clustered by (key, value) +sorted by (key DESC, value DESC) into 1 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_desc2 +POSTHOOK: query: create table table_desc2(key string, value string) clustered by (key, value) +sorted by (key DESC, value DESC) into 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_desc2 +PREHOOK: query: insert overwrite table table_desc1 select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@table_desc1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table table_desc1 select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@table_desc1 +POSTHOOK: Lineage: table_desc1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: table_desc1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table table_desc2 select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@table_desc2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table table_desc2 select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@table_desc2 +POSTHOOK: Lineage: table_desc2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: table_desc2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- The columns of the tables above are sorted in different orders. +-- So, sort merge join should not be performed + +explain +select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b +on a.key=b.key and a.value=b.value where a.key < 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- The columns of the tables above are sorted in different orders. +-- So, sort merge join should not be performed + +explain +select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b +on a.key=b.key and a.value=b.value where a.key < 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key is not null and value is not null) and (key < 10)) (type: boolean) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key is not null and value is not null) and (key < 10)) (type: boolean) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 2 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 2 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b +on a.key=b.key and a.value=b.value where a.key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@table_desc1 +PREHOOK: Input: default@table_desc2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b +on a.key=b.key and a.value=b.value where a.key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_desc1 +POSTHOOK: Input: default@table_desc2 +#### A masked pattern was here #### +22 diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out new file mode 100644 index 0000000..f59d942 --- /dev/null +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out @@ -0,0 +1,349 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 1 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 1 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) SORTED BY (value DESC) INTO 1 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) SORTED BY (value DESC) INTO 1 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: -- The partition sorting metadata matches but the table metadata does not, sorted merge join should still be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- The partition sorting metadata matches but the table metadata does not, sorted merge join should still be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + '1' + = + . + TOK_TABLE_OR_COL + b + part + '1' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 1 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 1 + numRows -1 + partition_columns part + partition_columns.types string + rawDataSize -1 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 1 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/part=1 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 1 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 1 + numRows -1 + partition_columns part + partition_columns.types string + rawDataSize -1 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 1 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/part=1 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +1028 diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out new file mode 100644 index 0000000..4085d9a --- /dev/null +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out @@ -0,0 +1,348 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (value DESC) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key) SORTED BY (value DESC) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: -- The table sorting metadata matches but the partition metadata does not, sorted merge join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' +PREHOOK: type: QUERY +POSTHOOK: query: -- The table sorting metadata matches but the partition metadata does not, sorted merge join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + part + '1' + = + . + TOK_TABLE_OR_COL + b + part + '1' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows -1 + partition_columns part + partition_columns.types string + rawDataSize -1 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/part=1 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows -1 + partition_columns part + partition_columns.types string + rawDataSize -1 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/part=1 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part = '1' AND b.part = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +#### A masked pattern was here #### +1028 diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out new file mode 100644 index 0000000..28336c5 --- /dev/null +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out @@ -0,0 +1,485 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key, value) SORTED BY (key DESC) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key, value) SORTED BY (key DESC) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key, value) SORTED BY (value DESC) INTO 2 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Output: default@srcbucket_mapjoin_part_1 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key, value) SORTED BY (value DESC) INTO 2 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Output: default@srcbucket_mapjoin_part_1 +PREHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_1 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key, value) SORTED BY (value DESC) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) +CLUSTERED BY (key, value) SORTED BY (value DESC) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key, value) SORTED BY (key DESC) INTO 2 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key, value) SORTED BY (key DESC) INTO 2 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=2 +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcbucket_mapjoin_part_2 PARTITION(part=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key, value) SORTED BY (value DESC) INTO 2 BUCKETS +PREHOOK: type: ALTERTABLE_CLUSTER_SORT +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Output: default@srcbucket_mapjoin_part_2 +POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key, value) SORTED BY (value DESC) INTO 2 BUCKETS +POSTHOOK: type: ALTERTABLE_CLUSTER_SORT +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: -- The table sorting metadata matches but the partition metadata does not, sorted merge join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL +PREHOOK: type: QUERY +POSTHOOK: query: -- The table sorting metadata matches but the partition metadata does not, sorted merge join should not be used + +EXPLAIN EXTENDED +SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_1 + a + TOK_TABREF + TOK_TABNAME + srcbucket_mapjoin_part_2 + b + AND + AND + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + a + part + TOK_FUNCTION + TOK_ISNOTNULL + . + TOK_TABLE_OR_COL + b + part + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2906 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows -1 + partition_columns part + partition_columns.types string + rawDataSize -1 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 +#### A masked pattern was here #### + Partition + base file name: part=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 2 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows -1 + partition_columns part + partition_columns.types string + rawDataSize -1 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/part=1 [b] + /srcbucket_mapjoin_part_2/part=2 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2906 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part=1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 1 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows -1 + partition_columns part + partition_columns.types string + rawDataSize -1 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 +#### A masked pattern was here #### + Partition + base file name: part=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + part 2 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + numFiles 2 + numRows -1 + partition_columns part + partition_columns.types string + rawDataSize -1 + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_1 + partition_columns part + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 1598 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1598 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) +FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b +ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=2 +#### A masked pattern was here #### +4112 diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out new file mode 100644 index 0000000..087a89d --- /dev/null +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out @@ -0,0 +1,317 @@ +PREHOOK: query: drop table table_desc1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table table_desc1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table table_desc2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table table_desc2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table table_desc3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table table_desc3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table table_desc4 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table table_desc4 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table table_desc1(key string, value string) clustered by (key) +sorted by (key DESC) into 1 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_desc1 +POSTHOOK: query: create table table_desc1(key string, value string) clustered by (key) +sorted by (key DESC) into 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_desc1 +PREHOOK: query: create table table_desc2(key string, value string) clustered by (key) +sorted by (key DESC, value DESC) into 1 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_desc2 +POSTHOOK: query: create table table_desc2(key string, value string) clustered by (key) +sorted by (key DESC, value DESC) into 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_desc2 +PREHOOK: query: create table table_desc3(key string, value1 string, value2 string) clustered by (key) +sorted by (key DESC, value1 DESC,value2 DESC) into 1 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_desc3 +POSTHOOK: query: create table table_desc3(key string, value1 string, value2 string) clustered by (key) +sorted by (key DESC, value1 DESC,value2 DESC) into 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_desc3 +PREHOOK: query: create table table_desc4(key string, value2 string) clustered by (key) +sorted by (key DESC, value2 DESC) into 1 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_desc4 +POSTHOOK: query: create table table_desc4(key string, value2 string) clustered by (key) +sorted by (key DESC, value2 DESC) into 1 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_desc4 +PREHOOK: query: insert overwrite table table_desc1 select key, value from src sort by key DESC +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@table_desc1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table table_desc1 select key, value from src sort by key DESC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@table_desc1 +POSTHOOK: Lineage: table_desc1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: table_desc1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table table_desc2 select key, value from src sort by key DESC +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@table_desc2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table table_desc2 select key, value from src sort by key DESC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@table_desc2 +POSTHOOK: Lineage: table_desc2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: table_desc2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table table_desc3 select key, value, concat(value,"_2") as value2 from src sort by key, value, value2 DESC +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@table_desc3 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table table_desc3 select key, value, concat(value,"_2") as value2 from src sort by key, value, value2 DESC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@table_desc3 +POSTHOOK: Lineage: table_desc3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: table_desc3.value1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: table_desc3.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table table_desc4 select key, concat(value,"_2") as value2 from src sort by key, value2 DESC +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@table_desc4 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table table_desc4 select key, concat(value,"_2") as value2 from src sort by key, value2 DESC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@table_desc4 +POSTHOOK: Lineage: table_desc4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: table_desc4.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- columns are sorted by one key in first table, two keys in second table but in same sort order for key. Hence SMB join should pass + +explain +select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b +on a.key=b.key where a.key < 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- columns are sorted by one key in first table, two keys in second table but in same sort order for key. Hence SMB join should pass + +explain +select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b +on a.key=b.key where a.key < 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key < 10)) (type: boolean) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key < 10)) (type: boolean) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 9 Data size: 991 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 9 Data size: 991 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b +on a.key=b.key where a.key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@table_desc1 +PREHOOK: Input: default@table_desc2 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b +on a.key=b.key where a.key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_desc1 +POSTHOOK: Input: default@table_desc2 +#### A masked pattern was here #### +22 +PREHOOK: query: -- columns are sorted by 3 keys(a, b, c) in first table, two keys(a, c) in second table with same sort order. Hence SMB join should not pass + +explain +select /*+ mapjoin(b) */ count(*) from table_desc3 a join table_desc4 b +on a.key=b.key and a.value2=b.value2 where a.key < 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- columns are sorted by 3 keys(a, b, c) in first table, two keys(a, c) in second table with same sort order. Hence SMB join should not pass + +explain +select /*+ mapjoin(b) */ count(*) from table_desc3 a join table_desc4 b +on a.key=b.key and a.value2=b.value2 where a.key < 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 34 Data size: 6812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key is not null and value2 is not null) and (key < 10)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value2 (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value2 (type: string) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 53 Data size: 10718 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key is not null and value2 is not null) and (key < 10)) (type: boolean) + Statistics: Num rows: 4 Data size: 808 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value2 (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value2 (type: string) + Statistics: Num rows: 4 Data size: 808 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 4 Data size: 888 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 4 Data size: 888 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b) */ count(*) from table_desc3 a join table_desc4 b +on a.key=b.key and a.value2=b.value2 where a.key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@table_desc3 +PREHOOK: Input: default@table_desc4 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b) */ count(*) from table_desc3 a join table_desc4 b +on a.key=b.key and a.value2=b.value2 where a.key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_desc3 +POSTHOOK: Input: default@table_desc4 +#### A masked pattern was here #### +22 diff --git ql/src/test/results/clientpositive/spark/temp_table_join1.q.out ql/src/test/results/clientpositive/spark/temp_table_join1.q.out new file mode 100644 index 0000000..f06a9ab --- /dev/null +++ ql/src/test/results/clientpositive/spark/temp_table_join1.q.out @@ -0,0 +1,328 @@ +PREHOOK: query: CREATE TABLE src_nontemp AS SELECT * FROM src limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@src_nontemp +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: CREATE TABLE src_nontemp AS SELECT * FROM src limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_nontemp +PREHOOK: query: CREATE TEMPORARY TABLE src_temp AS SELECT * FROM src limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@src_temp +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: CREATE TEMPORARY TABLE src_temp AS SELECT * FROM src limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_temp +PREHOOK: query: -- Non temp table join +EXPLAIN +FROM src_nontemp src1 JOIN src_nontemp src2 ON (src1.key = src2.key) +SELECT src1.key, src2.value +PREHOOK: type: QUERY +POSTHOOK: query: -- Non temp table join +EXPLAIN +FROM src_nontemp src1 JOIN src_nontemp src2 ON (src1.key = src2.key) +SELECT src1.key, src2.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 0 Data size: 114 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM src_nontemp src1 JOIN src_nontemp src2 ON (src1.key = src2.key) +SELECT src1.key, src2.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src_nontemp +#### A masked pattern was here #### +POSTHOOK: query: FROM src_nontemp src1 JOIN src_nontemp src2 ON (src1.key = src2.key) +SELECT src1.key, src2.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_nontemp +#### A masked pattern was here #### +165 val_165 +238 val_238 +255 val_255 +27 val_27 +278 val_278 +311 val_311 +409 val_409 +484 val_484 +86 val_86 +98 val_98 +PREHOOK: query: -- Non temp table join with temp table +EXPLAIN +FROM src_nontemp src1 JOIN src_temp src2 ON (src1.key = src2.key) +SELECT src1.key, src2.value +PREHOOK: type: QUERY +POSTHOOK: query: -- Non temp table join with temp table +EXPLAIN +FROM src_nontemp src1 JOIN src_temp src2 ON (src1.key = src2.key) +SELECT src1.key, src2.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 0 Data size: 114 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM src_nontemp src1 JOIN src_temp src2 ON (src1.key = src2.key) +SELECT src1.key, src2.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src_nontemp +PREHOOK: Input: default@src_temp +#### A masked pattern was here #### +POSTHOOK: query: FROM src_nontemp src1 JOIN src_temp src2 ON (src1.key = src2.key) +SELECT src1.key, src2.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_nontemp +POSTHOOK: Input: default@src_temp +#### A masked pattern was here #### +165 val_165 +238 val_238 +255 val_255 +27 val_27 +278 val_278 +311 val_311 +409 val_409 +484 val_484 +86 val_86 +98 val_98 +PREHOOK: query: -- temp table join with temp table +EXPLAIN +FROM src_temp src1 JOIN src_temp src2 ON (src1.key = src2.key) +SELECT src1.key, src2.value +PREHOOK: type: QUERY +POSTHOOK: query: -- temp table join with temp table +EXPLAIN +FROM src_temp src1 JOIN src_temp src2 ON (src1.key = src2.key) +SELECT src1.key, src2.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 0 Data size: 114 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: FROM src_temp src1 JOIN src_temp src2 ON (src1.key = src2.key) +SELECT src1.key, src2.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src_temp +#### A masked pattern was here #### +POSTHOOK: query: FROM src_temp src1 JOIN src_temp src2 ON (src1.key = src2.key) +SELECT src1.key, src2.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_temp +#### A masked pattern was here #### +165 val_165 +238 val_238 +255 val_255 +27 val_27 +278 val_278 +311 val_311 +409 val_409 +484 val_484 +86 val_86 +98 val_98 +PREHOOK: query: DROP TABLE src_nontemp +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_nontemp +PREHOOK: Output: default@src_nontemp +POSTHOOK: query: DROP TABLE src_nontemp +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_nontemp +POSTHOOK: Output: default@src_nontemp +PREHOOK: query: DROP TABLE src_temp +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_temp +PREHOOK: Output: default@src_temp +POSTHOOK: query: DROP TABLE src_temp +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_temp +POSTHOOK: Output: default@src_temp diff --git ql/src/test/results/clientpositive/spark/tez_join_tests.q.out ql/src/test/results/clientpositive/spark/tez_join_tests.q.out new file mode 100644 index 0000000..9254944 --- /dev/null +++ ql/src/test/results/clientpositive/spark/tez_join_tests.q.out @@ -0,0 +1,2212 @@ +PREHOOK: query: explain +select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 4 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 4 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} + 1 {VALUE._col0} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +NULL NULL 0 val_0 +NULL NULL 0 val_0 +NULL NULL 0 val_0 +NULL NULL 10 val_10 +NULL NULL 100 val_100 +NULL NULL 100 val_100 +NULL NULL 103 val_103 +NULL NULL 103 val_103 +NULL NULL 104 val_104 +NULL NULL 104 val_104 +NULL NULL 105 val_105 +NULL NULL 11 val_11 +NULL NULL 111 val_111 +NULL NULL 113 val_113 +NULL NULL 113 val_113 +NULL NULL 114 val_114 +NULL NULL 116 val_116 +NULL NULL 118 val_118 +NULL NULL 118 val_118 +NULL NULL 119 val_119 +NULL NULL 119 val_119 +NULL NULL 119 val_119 +NULL NULL 12 val_12 +NULL NULL 12 val_12 +NULL NULL 120 val_120 +NULL NULL 120 val_120 +NULL NULL 125 val_125 +NULL NULL 125 val_125 +NULL NULL 126 val_126 +NULL NULL 129 val_129 +NULL NULL 129 val_129 +NULL NULL 131 val_131 +NULL NULL 133 val_133 +NULL NULL 134 val_134 +NULL NULL 134 val_134 +NULL NULL 136 val_136 +NULL NULL 137 val_137 +NULL NULL 137 val_137 +NULL NULL 138 val_138 +NULL NULL 138 val_138 +NULL NULL 138 val_138 +NULL NULL 138 val_138 +NULL NULL 143 val_143 +NULL NULL 145 val_145 +NULL NULL 149 val_149 +NULL NULL 149 val_149 +NULL NULL 15 val_15 +NULL NULL 15 val_15 +NULL NULL 152 val_152 +NULL NULL 152 val_152 +NULL NULL 153 val_153 +NULL NULL 155 val_155 +NULL NULL 156 val_156 +NULL NULL 157 val_157 +NULL NULL 158 val_158 +NULL NULL 160 val_160 +NULL NULL 162 val_162 +NULL NULL 163 val_163 +NULL NULL 164 val_164 +NULL NULL 164 val_164 +NULL NULL 165 val_165 +NULL NULL 165 val_165 +NULL NULL 166 val_166 +NULL NULL 167 val_167 +NULL NULL 167 val_167 +NULL NULL 167 val_167 +NULL NULL 168 val_168 +NULL NULL 169 val_169 +NULL NULL 169 val_169 +NULL NULL 169 val_169 +NULL NULL 169 val_169 +NULL NULL 17 val_17 +NULL NULL 170 val_170 +NULL NULL 172 val_172 +NULL NULL 172 val_172 +NULL NULL 174 val_174 +NULL NULL 174 val_174 +NULL NULL 175 val_175 +NULL NULL 175 val_175 +NULL NULL 176 val_176 +NULL NULL 176 val_176 +NULL NULL 177 val_177 +NULL NULL 178 val_178 +NULL NULL 179 val_179 +NULL NULL 179 val_179 +NULL NULL 18 val_18 +NULL NULL 18 val_18 +NULL NULL 180 val_180 +NULL NULL 181 val_181 +NULL NULL 183 val_183 +NULL NULL 186 val_186 +NULL NULL 187 val_187 +NULL NULL 187 val_187 +NULL NULL 187 val_187 +NULL NULL 189 val_189 +NULL NULL 19 val_19 +NULL NULL 190 val_190 +NULL NULL 191 val_191 +NULL NULL 191 val_191 +NULL NULL 192 val_192 +NULL NULL 193 val_193 +NULL NULL 193 val_193 +NULL NULL 193 val_193 +NULL NULL 194 val_194 +NULL NULL 195 val_195 +NULL NULL 195 val_195 +NULL NULL 196 val_196 +NULL NULL 197 val_197 +NULL NULL 197 val_197 +NULL NULL 199 val_199 +NULL NULL 199 val_199 +NULL NULL 199 val_199 +NULL NULL 2 val_2 +NULL NULL 20 val_20 +NULL NULL 200 val_200 +NULL NULL 200 val_200 +NULL NULL 201 val_201 +NULL NULL 202 val_202 +NULL NULL 203 val_203 +NULL NULL 203 val_203 +NULL NULL 205 val_205 +NULL NULL 205 val_205 +NULL NULL 207 val_207 +NULL NULL 207 val_207 +NULL NULL 208 val_208 +NULL NULL 208 val_208 +NULL NULL 208 val_208 +NULL NULL 209 val_209 +NULL NULL 209 val_209 +NULL NULL 214 val_214 +NULL NULL 216 val_216 +NULL NULL 216 val_216 +NULL NULL 217 val_217 +NULL NULL 217 val_217 +NULL NULL 218 val_218 +NULL NULL 219 val_219 +NULL NULL 219 val_219 +NULL NULL 221 val_221 +NULL NULL 221 val_221 +NULL NULL 222 val_222 +NULL NULL 223 val_223 +NULL NULL 223 val_223 +NULL NULL 226 val_226 +NULL NULL 228 val_228 +NULL NULL 229 val_229 +NULL NULL 229 val_229 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 233 val_233 +NULL NULL 233 val_233 +NULL NULL 235 val_235 +NULL NULL 237 val_237 +NULL NULL 237 val_237 +NULL NULL 239 val_239 +NULL NULL 239 val_239 +NULL NULL 24 val_24 +NULL NULL 24 val_24 +NULL NULL 241 val_241 +NULL NULL 242 val_242 +NULL NULL 242 val_242 +NULL NULL 244 val_244 +NULL NULL 247 val_247 +NULL NULL 248 val_248 +NULL NULL 249 val_249 +NULL NULL 252 val_252 +NULL NULL 256 val_256 +NULL NULL 256 val_256 +NULL NULL 257 val_257 +NULL NULL 258 val_258 +NULL NULL 26 val_26 +NULL NULL 26 val_26 +NULL NULL 260 val_260 +NULL NULL 262 val_262 +NULL NULL 263 val_263 +NULL NULL 265 val_265 +NULL NULL 265 val_265 +NULL NULL 266 val_266 +NULL NULL 27 val_27 +NULL NULL 272 val_272 +NULL NULL 272 val_272 +NULL NULL 274 val_274 +NULL NULL 275 val_275 +NULL NULL 277 val_277 +NULL NULL 277 val_277 +NULL NULL 277 val_277 +NULL NULL 277 val_277 +NULL NULL 28 val_28 +NULL NULL 280 val_280 +NULL NULL 280 val_280 +NULL NULL 281 val_281 +NULL NULL 281 val_281 +NULL NULL 282 val_282 +NULL NULL 282 val_282 +NULL NULL 283 val_283 +NULL NULL 284 val_284 +NULL NULL 285 val_285 +NULL NULL 286 val_286 +NULL NULL 287 val_287 +NULL NULL 288 val_288 +NULL NULL 288 val_288 +NULL NULL 289 val_289 +NULL NULL 291 val_291 +NULL NULL 292 val_292 +NULL NULL 296 val_296 +NULL NULL 298 val_298 +NULL NULL 298 val_298 +NULL NULL 298 val_298 +NULL NULL 30 val_30 +NULL NULL 302 val_302 +NULL NULL 305 val_305 +NULL NULL 306 val_306 +NULL NULL 307 val_307 +NULL NULL 307 val_307 +NULL NULL 308 val_308 +NULL NULL 309 val_309 +NULL NULL 309 val_309 +NULL NULL 310 val_310 +NULL NULL 315 val_315 +NULL NULL 316 val_316 +NULL NULL 316 val_316 +NULL NULL 316 val_316 +NULL NULL 317 val_317 +NULL NULL 317 val_317 +NULL NULL 318 val_318 +NULL NULL 318 val_318 +NULL NULL 318 val_318 +NULL NULL 321 val_321 +NULL NULL 321 val_321 +NULL NULL 322 val_322 +NULL NULL 322 val_322 +NULL NULL 323 val_323 +NULL NULL 325 val_325 +NULL NULL 325 val_325 +NULL NULL 327 val_327 +NULL NULL 327 val_327 +NULL NULL 327 val_327 +NULL NULL 33 val_33 +NULL NULL 331 val_331 +NULL NULL 331 val_331 +NULL NULL 332 val_332 +NULL NULL 333 val_333 +NULL NULL 333 val_333 +NULL NULL 335 val_335 +NULL NULL 336 val_336 +NULL NULL 338 val_338 +NULL NULL 339 val_339 +NULL NULL 34 val_34 +NULL NULL 341 val_341 +NULL NULL 342 val_342 +NULL NULL 342 val_342 +NULL NULL 344 val_344 +NULL NULL 344 val_344 +NULL NULL 345 val_345 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 35 val_35 +NULL NULL 35 val_35 +NULL NULL 35 val_35 +NULL NULL 351 val_351 +NULL NULL 353 val_353 +NULL NULL 353 val_353 +NULL NULL 356 val_356 +NULL NULL 360 val_360 +NULL NULL 362 val_362 +NULL NULL 364 val_364 +NULL NULL 365 val_365 +NULL NULL 366 val_366 +NULL NULL 367 val_367 +NULL NULL 367 val_367 +NULL NULL 368 val_368 +NULL NULL 37 val_37 +NULL NULL 37 val_37 +NULL NULL 373 val_373 +NULL NULL 374 val_374 +NULL NULL 375 val_375 +NULL NULL 377 val_377 +NULL NULL 378 val_378 +NULL NULL 379 val_379 +NULL NULL 382 val_382 +NULL NULL 382 val_382 +NULL NULL 384 val_384 +NULL NULL 384 val_384 +NULL NULL 384 val_384 +NULL NULL 386 val_386 +NULL NULL 389 val_389 +NULL NULL 392 val_392 +NULL NULL 393 val_393 +NULL NULL 394 val_394 +NULL NULL 395 val_395 +NULL NULL 395 val_395 +NULL NULL 396 val_396 +NULL NULL 396 val_396 +NULL NULL 396 val_396 +NULL NULL 397 val_397 +NULL NULL 397 val_397 +NULL NULL 399 val_399 +NULL NULL 399 val_399 +NULL NULL 4 val_4 +NULL NULL 400 val_400 +NULL NULL 402 val_402 +NULL NULL 403 val_403 +NULL NULL 403 val_403 +NULL NULL 403 val_403 +NULL NULL 404 val_404 +NULL NULL 404 val_404 +NULL NULL 407 val_407 +NULL NULL 409 val_409 +NULL NULL 409 val_409 +NULL NULL 409 val_409 +NULL NULL 41 val_41 +NULL NULL 411 val_411 +NULL NULL 413 val_413 +NULL NULL 413 val_413 +NULL NULL 414 val_414 +NULL NULL 414 val_414 +NULL NULL 417 val_417 +NULL NULL 417 val_417 +NULL NULL 417 val_417 +NULL NULL 418 val_418 +NULL NULL 419 val_419 +NULL NULL 42 val_42 +NULL NULL 42 val_42 +NULL NULL 421 val_421 +NULL NULL 424 val_424 +NULL NULL 424 val_424 +NULL NULL 427 val_427 +NULL NULL 429 val_429 +NULL NULL 429 val_429 +NULL NULL 43 val_43 +NULL NULL 430 val_430 +NULL NULL 430 val_430 +NULL NULL 430 val_430 +NULL NULL 431 val_431 +NULL NULL 431 val_431 +NULL NULL 431 val_431 +NULL NULL 432 val_432 +NULL NULL 435 val_435 +NULL NULL 436 val_436 +NULL NULL 437 val_437 +NULL NULL 438 val_438 +NULL NULL 438 val_438 +NULL NULL 438 val_438 +NULL NULL 439 val_439 +NULL NULL 439 val_439 +NULL NULL 44 val_44 +NULL NULL 443 val_443 +NULL NULL 444 val_444 +NULL NULL 446 val_446 +NULL NULL 448 val_448 +NULL NULL 449 val_449 +NULL NULL 452 val_452 +NULL NULL 453 val_453 +NULL NULL 454 val_454 +NULL NULL 454 val_454 +NULL NULL 454 val_454 +NULL NULL 455 val_455 +NULL NULL 457 val_457 +NULL NULL 458 val_458 +NULL NULL 458 val_458 +NULL NULL 459 val_459 +NULL NULL 459 val_459 +NULL NULL 460 val_460 +NULL NULL 462 val_462 +NULL NULL 462 val_462 +NULL NULL 463 val_463 +NULL NULL 463 val_463 +NULL NULL 466 val_466 +NULL NULL 466 val_466 +NULL NULL 466 val_466 +NULL NULL 467 val_467 +NULL NULL 468 val_468 +NULL NULL 468 val_468 +NULL NULL 468 val_468 +NULL NULL 468 val_468 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 47 val_47 +NULL NULL 470 val_470 +NULL NULL 472 val_472 +NULL NULL 475 val_475 +NULL NULL 477 val_477 +NULL NULL 478 val_478 +NULL NULL 478 val_478 +NULL NULL 479 val_479 +NULL NULL 480 val_480 +NULL NULL 480 val_480 +NULL NULL 480 val_480 +NULL NULL 481 val_481 +NULL NULL 482 val_482 +NULL NULL 483 val_483 +NULL NULL 484 val_484 +NULL NULL 485 val_485 +NULL NULL 487 val_487 +NULL NULL 489 val_489 +NULL NULL 489 val_489 +NULL NULL 489 val_489 +NULL NULL 489 val_489 +NULL NULL 490 val_490 +NULL NULL 491 val_491 +NULL NULL 492 val_492 +NULL NULL 492 val_492 +NULL NULL 493 val_493 +NULL NULL 494 val_494 +NULL NULL 495 val_495 +NULL NULL 496 val_496 +NULL NULL 497 val_497 +NULL NULL 498 val_498 +NULL NULL 498 val_498 +NULL NULL 498 val_498 +NULL NULL 5 val_5 +NULL NULL 5 val_5 +NULL NULL 5 val_5 +NULL NULL 51 val_51 +NULL NULL 51 val_51 +NULL NULL 53 val_53 +NULL NULL 54 val_54 +NULL NULL 57 val_57 +NULL NULL 58 val_58 +NULL NULL 58 val_58 +NULL NULL 64 val_64 +NULL NULL 65 val_65 +NULL NULL 67 val_67 +NULL NULL 67 val_67 +NULL NULL 69 val_69 +NULL NULL 70 val_70 +NULL NULL 70 val_70 +NULL NULL 70 val_70 +NULL NULL 72 val_72 +NULL NULL 72 val_72 +NULL NULL 74 val_74 +NULL NULL 76 val_76 +NULL NULL 76 val_76 +NULL NULL 77 val_77 +NULL NULL 78 val_78 +NULL NULL 8 val_8 +NULL NULL 80 val_80 +NULL NULL 82 val_82 +NULL NULL 83 val_83 +NULL NULL 83 val_83 +NULL NULL 84 val_84 +NULL NULL 84 val_84 +NULL NULL 85 val_85 +NULL NULL 86 val_86 +NULL NULL 87 val_87 +NULL NULL 9 val_9 +NULL NULL 90 val_90 +NULL NULL 90 val_90 +NULL NULL 90 val_90 +NULL NULL 92 val_92 +NULL NULL 95 val_95 +NULL NULL 95 val_95 +NULL NULL 96 val_96 +NULL NULL 97 val_97 +NULL NULL 97 val_97 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +146 val_146 146 val_146 +146 val_146 146 val_146 +146 val_146 146 val_146 +146 val_146 146 val_146 +150 val_150 150 val_150 +213 val_213 213 val_213 +213 val_213 213 val_213 +213 val_213 213 val_213 +213 val_213 213 val_213 +224 val_224 224 val_224 +224 val_224 224 val_224 +224 val_224 224 val_224 +224 val_224 224 val_224 +238 val_238 238 val_238 +238 val_238 238 val_238 +238 val_238 238 val_238 +238 val_238 238 val_238 +255 val_255 255 val_255 +255 val_255 255 val_255 +255 val_255 255 val_255 +255 val_255 255 val_255 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +278 val_278 278 val_278 +278 val_278 278 val_278 +278 val_278 278 val_278 +278 val_278 278 val_278 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +66 val_66 66 val_66 +98 val_98 98 val_98 +98 val_98 98 val_98 +98 val_98 98 val_98 +98 val_98 98 val_98 +PREHOOK: query: select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key)) x right outer join src c on (x.value = c.value) order by x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key)) x right outer join src c on (x.value = c.value) order by x.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +NULL NULL 0 val_0 +NULL NULL 0 val_0 +NULL NULL 0 val_0 +NULL NULL 10 val_10 +NULL NULL 100 val_100 +NULL NULL 100 val_100 +NULL NULL 103 val_103 +NULL NULL 103 val_103 +NULL NULL 104 val_104 +NULL NULL 104 val_104 +NULL NULL 105 val_105 +NULL NULL 11 val_11 +NULL NULL 111 val_111 +NULL NULL 113 val_113 +NULL NULL 113 val_113 +NULL NULL 114 val_114 +NULL NULL 116 val_116 +NULL NULL 118 val_118 +NULL NULL 118 val_118 +NULL NULL 119 val_119 +NULL NULL 119 val_119 +NULL NULL 119 val_119 +NULL NULL 12 val_12 +NULL NULL 12 val_12 +NULL NULL 120 val_120 +NULL NULL 120 val_120 +NULL NULL 125 val_125 +NULL NULL 125 val_125 +NULL NULL 126 val_126 +NULL NULL 129 val_129 +NULL NULL 129 val_129 +NULL NULL 131 val_131 +NULL NULL 133 val_133 +NULL NULL 134 val_134 +NULL NULL 134 val_134 +NULL NULL 136 val_136 +NULL NULL 137 val_137 +NULL NULL 137 val_137 +NULL NULL 138 val_138 +NULL NULL 138 val_138 +NULL NULL 138 val_138 +NULL NULL 138 val_138 +NULL NULL 143 val_143 +NULL NULL 145 val_145 +NULL NULL 149 val_149 +NULL NULL 149 val_149 +NULL NULL 15 val_15 +NULL NULL 15 val_15 +NULL NULL 152 val_152 +NULL NULL 152 val_152 +NULL NULL 153 val_153 +NULL NULL 155 val_155 +NULL NULL 156 val_156 +NULL NULL 157 val_157 +NULL NULL 158 val_158 +NULL NULL 160 val_160 +NULL NULL 162 val_162 +NULL NULL 163 val_163 +NULL NULL 164 val_164 +NULL NULL 164 val_164 +NULL NULL 165 val_165 +NULL NULL 165 val_165 +NULL NULL 166 val_166 +NULL NULL 167 val_167 +NULL NULL 167 val_167 +NULL NULL 167 val_167 +NULL NULL 168 val_168 +NULL NULL 169 val_169 +NULL NULL 169 val_169 +NULL NULL 169 val_169 +NULL NULL 169 val_169 +NULL NULL 17 val_17 +NULL NULL 170 val_170 +NULL NULL 172 val_172 +NULL NULL 172 val_172 +NULL NULL 174 val_174 +NULL NULL 174 val_174 +NULL NULL 175 val_175 +NULL NULL 175 val_175 +NULL NULL 176 val_176 +NULL NULL 176 val_176 +NULL NULL 177 val_177 +NULL NULL 178 val_178 +NULL NULL 179 val_179 +NULL NULL 179 val_179 +NULL NULL 18 val_18 +NULL NULL 18 val_18 +NULL NULL 180 val_180 +NULL NULL 181 val_181 +NULL NULL 183 val_183 +NULL NULL 186 val_186 +NULL NULL 187 val_187 +NULL NULL 187 val_187 +NULL NULL 187 val_187 +NULL NULL 189 val_189 +NULL NULL 19 val_19 +NULL NULL 190 val_190 +NULL NULL 191 val_191 +NULL NULL 191 val_191 +NULL NULL 192 val_192 +NULL NULL 193 val_193 +NULL NULL 193 val_193 +NULL NULL 193 val_193 +NULL NULL 194 val_194 +NULL NULL 195 val_195 +NULL NULL 195 val_195 +NULL NULL 196 val_196 +NULL NULL 197 val_197 +NULL NULL 197 val_197 +NULL NULL 199 val_199 +NULL NULL 199 val_199 +NULL NULL 199 val_199 +NULL NULL 2 val_2 +NULL NULL 20 val_20 +NULL NULL 200 val_200 +NULL NULL 200 val_200 +NULL NULL 201 val_201 +NULL NULL 202 val_202 +NULL NULL 203 val_203 +NULL NULL 203 val_203 +NULL NULL 205 val_205 +NULL NULL 205 val_205 +NULL NULL 207 val_207 +NULL NULL 207 val_207 +NULL NULL 208 val_208 +NULL NULL 208 val_208 +NULL NULL 208 val_208 +NULL NULL 209 val_209 +NULL NULL 209 val_209 +NULL NULL 214 val_214 +NULL NULL 216 val_216 +NULL NULL 216 val_216 +NULL NULL 217 val_217 +NULL NULL 217 val_217 +NULL NULL 218 val_218 +NULL NULL 219 val_219 +NULL NULL 219 val_219 +NULL NULL 221 val_221 +NULL NULL 221 val_221 +NULL NULL 222 val_222 +NULL NULL 223 val_223 +NULL NULL 223 val_223 +NULL NULL 226 val_226 +NULL NULL 228 val_228 +NULL NULL 229 val_229 +NULL NULL 229 val_229 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 233 val_233 +NULL NULL 233 val_233 +NULL NULL 235 val_235 +NULL NULL 237 val_237 +NULL NULL 237 val_237 +NULL NULL 239 val_239 +NULL NULL 239 val_239 +NULL NULL 24 val_24 +NULL NULL 24 val_24 +NULL NULL 241 val_241 +NULL NULL 242 val_242 +NULL NULL 242 val_242 +NULL NULL 244 val_244 +NULL NULL 247 val_247 +NULL NULL 248 val_248 +NULL NULL 249 val_249 +NULL NULL 252 val_252 +NULL NULL 256 val_256 +NULL NULL 256 val_256 +NULL NULL 257 val_257 +NULL NULL 258 val_258 +NULL NULL 26 val_26 +NULL NULL 26 val_26 +NULL NULL 260 val_260 +NULL NULL 262 val_262 +NULL NULL 263 val_263 +NULL NULL 265 val_265 +NULL NULL 265 val_265 +NULL NULL 266 val_266 +NULL NULL 27 val_27 +NULL NULL 272 val_272 +NULL NULL 272 val_272 +NULL NULL 274 val_274 +NULL NULL 275 val_275 +NULL NULL 277 val_277 +NULL NULL 277 val_277 +NULL NULL 277 val_277 +NULL NULL 277 val_277 +NULL NULL 28 val_28 +NULL NULL 280 val_280 +NULL NULL 280 val_280 +NULL NULL 281 val_281 +NULL NULL 281 val_281 +NULL NULL 282 val_282 +NULL NULL 282 val_282 +NULL NULL 283 val_283 +NULL NULL 284 val_284 +NULL NULL 285 val_285 +NULL NULL 286 val_286 +NULL NULL 287 val_287 +NULL NULL 288 val_288 +NULL NULL 288 val_288 +NULL NULL 289 val_289 +NULL NULL 291 val_291 +NULL NULL 292 val_292 +NULL NULL 296 val_296 +NULL NULL 298 val_298 +NULL NULL 298 val_298 +NULL NULL 298 val_298 +NULL NULL 30 val_30 +NULL NULL 302 val_302 +NULL NULL 305 val_305 +NULL NULL 306 val_306 +NULL NULL 307 val_307 +NULL NULL 307 val_307 +NULL NULL 308 val_308 +NULL NULL 309 val_309 +NULL NULL 309 val_309 +NULL NULL 310 val_310 +NULL NULL 315 val_315 +NULL NULL 316 val_316 +NULL NULL 316 val_316 +NULL NULL 316 val_316 +NULL NULL 317 val_317 +NULL NULL 317 val_317 +NULL NULL 318 val_318 +NULL NULL 318 val_318 +NULL NULL 318 val_318 +NULL NULL 321 val_321 +NULL NULL 321 val_321 +NULL NULL 322 val_322 +NULL NULL 322 val_322 +NULL NULL 323 val_323 +NULL NULL 325 val_325 +NULL NULL 325 val_325 +NULL NULL 327 val_327 +NULL NULL 327 val_327 +NULL NULL 327 val_327 +NULL NULL 33 val_33 +NULL NULL 331 val_331 +NULL NULL 331 val_331 +NULL NULL 332 val_332 +NULL NULL 333 val_333 +NULL NULL 333 val_333 +NULL NULL 335 val_335 +NULL NULL 336 val_336 +NULL NULL 338 val_338 +NULL NULL 339 val_339 +NULL NULL 34 val_34 +NULL NULL 341 val_341 +NULL NULL 342 val_342 +NULL NULL 342 val_342 +NULL NULL 344 val_344 +NULL NULL 344 val_344 +NULL NULL 345 val_345 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 35 val_35 +NULL NULL 35 val_35 +NULL NULL 35 val_35 +NULL NULL 351 val_351 +NULL NULL 353 val_353 +NULL NULL 353 val_353 +NULL NULL 356 val_356 +NULL NULL 360 val_360 +NULL NULL 362 val_362 +NULL NULL 364 val_364 +NULL NULL 365 val_365 +NULL NULL 366 val_366 +NULL NULL 367 val_367 +NULL NULL 367 val_367 +NULL NULL 368 val_368 +NULL NULL 37 val_37 +NULL NULL 37 val_37 +NULL NULL 373 val_373 +NULL NULL 374 val_374 +NULL NULL 375 val_375 +NULL NULL 377 val_377 +NULL NULL 378 val_378 +NULL NULL 379 val_379 +NULL NULL 382 val_382 +NULL NULL 382 val_382 +NULL NULL 384 val_384 +NULL NULL 384 val_384 +NULL NULL 384 val_384 +NULL NULL 386 val_386 +NULL NULL 389 val_389 +NULL NULL 392 val_392 +NULL NULL 393 val_393 +NULL NULL 394 val_394 +NULL NULL 395 val_395 +NULL NULL 395 val_395 +NULL NULL 396 val_396 +NULL NULL 396 val_396 +NULL NULL 396 val_396 +NULL NULL 397 val_397 +NULL NULL 397 val_397 +NULL NULL 399 val_399 +NULL NULL 399 val_399 +NULL NULL 4 val_4 +NULL NULL 400 val_400 +NULL NULL 402 val_402 +NULL NULL 403 val_403 +NULL NULL 403 val_403 +NULL NULL 403 val_403 +NULL NULL 404 val_404 +NULL NULL 404 val_404 +NULL NULL 407 val_407 +NULL NULL 409 val_409 +NULL NULL 409 val_409 +NULL NULL 409 val_409 +NULL NULL 41 val_41 +NULL NULL 411 val_411 +NULL NULL 413 val_413 +NULL NULL 413 val_413 +NULL NULL 414 val_414 +NULL NULL 414 val_414 +NULL NULL 417 val_417 +NULL NULL 417 val_417 +NULL NULL 417 val_417 +NULL NULL 418 val_418 +NULL NULL 419 val_419 +NULL NULL 42 val_42 +NULL NULL 42 val_42 +NULL NULL 421 val_421 +NULL NULL 424 val_424 +NULL NULL 424 val_424 +NULL NULL 427 val_427 +NULL NULL 429 val_429 +NULL NULL 429 val_429 +NULL NULL 43 val_43 +NULL NULL 430 val_430 +NULL NULL 430 val_430 +NULL NULL 430 val_430 +NULL NULL 431 val_431 +NULL NULL 431 val_431 +NULL NULL 431 val_431 +NULL NULL 432 val_432 +NULL NULL 435 val_435 +NULL NULL 436 val_436 +NULL NULL 437 val_437 +NULL NULL 438 val_438 +NULL NULL 438 val_438 +NULL NULL 438 val_438 +NULL NULL 439 val_439 +NULL NULL 439 val_439 +NULL NULL 44 val_44 +NULL NULL 443 val_443 +NULL NULL 444 val_444 +NULL NULL 446 val_446 +NULL NULL 448 val_448 +NULL NULL 449 val_449 +NULL NULL 452 val_452 +NULL NULL 453 val_453 +NULL NULL 454 val_454 +NULL NULL 454 val_454 +NULL NULL 454 val_454 +NULL NULL 455 val_455 +NULL NULL 457 val_457 +NULL NULL 458 val_458 +NULL NULL 458 val_458 +NULL NULL 459 val_459 +NULL NULL 459 val_459 +NULL NULL 460 val_460 +NULL NULL 462 val_462 +NULL NULL 462 val_462 +NULL NULL 463 val_463 +NULL NULL 463 val_463 +NULL NULL 466 val_466 +NULL NULL 466 val_466 +NULL NULL 466 val_466 +NULL NULL 467 val_467 +NULL NULL 468 val_468 +NULL NULL 468 val_468 +NULL NULL 468 val_468 +NULL NULL 468 val_468 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 47 val_47 +NULL NULL 470 val_470 +NULL NULL 472 val_472 +NULL NULL 475 val_475 +NULL NULL 477 val_477 +NULL NULL 478 val_478 +NULL NULL 478 val_478 +NULL NULL 479 val_479 +NULL NULL 480 val_480 +NULL NULL 480 val_480 +NULL NULL 480 val_480 +NULL NULL 481 val_481 +NULL NULL 482 val_482 +NULL NULL 483 val_483 +NULL NULL 484 val_484 +NULL NULL 485 val_485 +NULL NULL 487 val_487 +NULL NULL 489 val_489 +NULL NULL 489 val_489 +NULL NULL 489 val_489 +NULL NULL 489 val_489 +NULL NULL 490 val_490 +NULL NULL 491 val_491 +NULL NULL 492 val_492 +NULL NULL 492 val_492 +NULL NULL 493 val_493 +NULL NULL 494 val_494 +NULL NULL 495 val_495 +NULL NULL 496 val_496 +NULL NULL 497 val_497 +NULL NULL 498 val_498 +NULL NULL 498 val_498 +NULL NULL 498 val_498 +NULL NULL 5 val_5 +NULL NULL 5 val_5 +NULL NULL 5 val_5 +NULL NULL 51 val_51 +NULL NULL 51 val_51 +NULL NULL 53 val_53 +NULL NULL 54 val_54 +NULL NULL 57 val_57 +NULL NULL 58 val_58 +NULL NULL 58 val_58 +NULL NULL 64 val_64 +NULL NULL 65 val_65 +NULL NULL 67 val_67 +NULL NULL 67 val_67 +NULL NULL 69 val_69 +NULL NULL 70 val_70 +NULL NULL 70 val_70 +NULL NULL 70 val_70 +NULL NULL 72 val_72 +NULL NULL 72 val_72 +NULL NULL 74 val_74 +NULL NULL 76 val_76 +NULL NULL 76 val_76 +NULL NULL 77 val_77 +NULL NULL 78 val_78 +NULL NULL 8 val_8 +NULL NULL 80 val_80 +NULL NULL 82 val_82 +NULL NULL 83 val_83 +NULL NULL 83 val_83 +NULL NULL 84 val_84 +NULL NULL 84 val_84 +NULL NULL 85 val_85 +NULL NULL 86 val_86 +NULL NULL 87 val_87 +NULL NULL 9 val_9 +NULL NULL 90 val_90 +NULL NULL 90 val_90 +NULL NULL 90 val_90 +NULL NULL 92 val_92 +NULL NULL 95 val_95 +NULL NULL 95 val_95 +NULL NULL 96 val_96 +NULL NULL 97 val_97 +NULL NULL 97 val_97 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +146 val_146 146 val_146 +146 val_146 146 val_146 +146 val_146 146 val_146 +146 val_146 146 val_146 +150 val_150 150 val_150 +213 val_213 213 val_213 +213 val_213 213 val_213 +213 val_213 213 val_213 +213 val_213 213 val_213 +224 val_224 224 val_224 +224 val_224 224 val_224 +224 val_224 224 val_224 +224 val_224 224 val_224 +238 val_238 238 val_238 +238 val_238 238 val_238 +238 val_238 238 val_238 +238 val_238 238 val_238 +255 val_255 255 val_255 +255 val_255 255 val_255 +255 val_255 255 val_255 +255 val_255 255 val_255 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +278 val_278 278 val_278 +278 val_278 278 val_278 +278 val_278 278 val_278 +278 val_278 278 val_278 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +66 val_66 66 val_66 +98 val_98 98 val_98 +98 val_98 98 val_98 +98 val_98 98 val_98 +98 val_98 98 val_98 +PREHOOK: query: select * from src1 a left outer join src b on (a.key = b.key) right outer join src c on (a.value = c.value) order by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select * from src1 a left outer join src b on (a.key = b.key) right outer join src c on (a.value = c.value) order by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +NULL NULL NULL NULL 0 val_0 +NULL NULL NULL NULL 0 val_0 +NULL NULL NULL NULL 0 val_0 +NULL NULL NULL NULL 10 val_10 +NULL NULL NULL NULL 100 val_100 +NULL NULL NULL NULL 100 val_100 +NULL NULL NULL NULL 103 val_103 +NULL NULL NULL NULL 103 val_103 +NULL NULL NULL NULL 104 val_104 +NULL NULL NULL NULL 104 val_104 +NULL NULL NULL NULL 105 val_105 +NULL NULL NULL NULL 11 val_11 +NULL NULL NULL NULL 111 val_111 +NULL NULL NULL NULL 113 val_113 +NULL NULL NULL NULL 113 val_113 +NULL NULL NULL NULL 114 val_114 +NULL NULL NULL NULL 116 val_116 +NULL NULL NULL NULL 118 val_118 +NULL NULL NULL NULL 118 val_118 +NULL NULL NULL NULL 119 val_119 +NULL NULL NULL NULL 119 val_119 +NULL NULL NULL NULL 119 val_119 +NULL NULL NULL NULL 12 val_12 +NULL NULL NULL NULL 12 val_12 +NULL NULL NULL NULL 120 val_120 +NULL NULL NULL NULL 120 val_120 +NULL NULL NULL NULL 125 val_125 +NULL NULL NULL NULL 125 val_125 +NULL NULL NULL NULL 126 val_126 +NULL NULL NULL NULL 128 val_128 +NULL NULL NULL NULL 128 val_128 +NULL NULL NULL NULL 128 val_128 +NULL NULL NULL NULL 129 val_129 +NULL NULL NULL NULL 129 val_129 +NULL NULL NULL NULL 131 val_131 +NULL NULL NULL NULL 133 val_133 +NULL NULL NULL NULL 134 val_134 +NULL NULL NULL NULL 134 val_134 +NULL NULL NULL NULL 136 val_136 +NULL NULL NULL NULL 137 val_137 +NULL NULL NULL NULL 137 val_137 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 138 val_138 +NULL NULL NULL NULL 143 val_143 +NULL NULL NULL NULL 145 val_145 +NULL NULL NULL NULL 149 val_149 +NULL NULL NULL NULL 149 val_149 +NULL NULL NULL NULL 15 val_15 +NULL NULL NULL NULL 15 val_15 +NULL NULL NULL NULL 152 val_152 +NULL NULL NULL NULL 152 val_152 +NULL NULL NULL NULL 153 val_153 +NULL NULL NULL NULL 155 val_155 +NULL NULL NULL NULL 156 val_156 +NULL NULL NULL NULL 157 val_157 +NULL NULL NULL NULL 158 val_158 +NULL NULL NULL NULL 160 val_160 +NULL NULL NULL NULL 162 val_162 +NULL NULL NULL NULL 163 val_163 +NULL NULL NULL NULL 164 val_164 +NULL NULL NULL NULL 164 val_164 +NULL NULL NULL NULL 166 val_166 +NULL NULL NULL NULL 167 val_167 +NULL NULL NULL NULL 167 val_167 +NULL NULL NULL NULL 167 val_167 +NULL NULL NULL NULL 168 val_168 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 169 val_169 +NULL NULL NULL NULL 17 val_17 +NULL NULL NULL NULL 170 val_170 +NULL NULL NULL NULL 172 val_172 +NULL NULL NULL NULL 172 val_172 +NULL NULL NULL NULL 174 val_174 +NULL NULL NULL NULL 174 val_174 +NULL NULL NULL NULL 175 val_175 +NULL NULL NULL NULL 175 val_175 +NULL NULL NULL NULL 176 val_176 +NULL NULL NULL NULL 176 val_176 +NULL NULL NULL NULL 177 val_177 +NULL NULL NULL NULL 178 val_178 +NULL NULL NULL NULL 179 val_179 +NULL NULL NULL NULL 179 val_179 +NULL NULL NULL NULL 18 val_18 +NULL NULL NULL NULL 18 val_18 +NULL NULL NULL NULL 180 val_180 +NULL NULL NULL NULL 181 val_181 +NULL NULL NULL NULL 183 val_183 +NULL NULL NULL NULL 186 val_186 +NULL NULL NULL NULL 187 val_187 +NULL NULL NULL NULL 187 val_187 +NULL NULL NULL NULL 187 val_187 +NULL NULL NULL NULL 189 val_189 +NULL NULL NULL NULL 19 val_19 +NULL NULL NULL NULL 190 val_190 +NULL NULL NULL NULL 191 val_191 +NULL NULL NULL NULL 191 val_191 +NULL NULL NULL NULL 192 val_192 +NULL NULL NULL NULL 194 val_194 +NULL NULL NULL NULL 195 val_195 +NULL NULL NULL NULL 195 val_195 +NULL NULL NULL NULL 196 val_196 +NULL NULL NULL NULL 197 val_197 +NULL NULL NULL NULL 197 val_197 +NULL NULL NULL NULL 199 val_199 +NULL NULL NULL NULL 199 val_199 +NULL NULL NULL NULL 199 val_199 +NULL NULL NULL NULL 2 val_2 +NULL NULL NULL NULL 20 val_20 +NULL NULL NULL NULL 200 val_200 +NULL NULL NULL NULL 200 val_200 +NULL NULL NULL NULL 201 val_201 +NULL NULL NULL NULL 202 val_202 +NULL NULL NULL NULL 203 val_203 +NULL NULL NULL NULL 203 val_203 +NULL NULL NULL NULL 205 val_205 +NULL NULL NULL NULL 205 val_205 +NULL NULL NULL NULL 207 val_207 +NULL NULL NULL NULL 207 val_207 +NULL NULL NULL NULL 208 val_208 +NULL NULL NULL NULL 208 val_208 +NULL NULL NULL NULL 208 val_208 +NULL NULL NULL NULL 209 val_209 +NULL NULL NULL NULL 209 val_209 +NULL NULL NULL NULL 214 val_214 +NULL NULL NULL NULL 216 val_216 +NULL NULL NULL NULL 216 val_216 +NULL NULL NULL NULL 217 val_217 +NULL NULL NULL NULL 217 val_217 +NULL NULL NULL NULL 218 val_218 +NULL NULL NULL NULL 219 val_219 +NULL NULL NULL NULL 219 val_219 +NULL NULL NULL NULL 221 val_221 +NULL NULL NULL NULL 221 val_221 +NULL NULL NULL NULL 222 val_222 +NULL NULL NULL NULL 223 val_223 +NULL NULL NULL NULL 223 val_223 +NULL NULL NULL NULL 224 val_224 +NULL NULL NULL NULL 224 val_224 +NULL NULL NULL NULL 226 val_226 +NULL NULL NULL NULL 228 val_228 +NULL NULL NULL NULL 229 val_229 +NULL NULL NULL NULL 229 val_229 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 230 val_230 +NULL NULL NULL NULL 233 val_233 +NULL NULL NULL NULL 233 val_233 +NULL NULL NULL NULL 235 val_235 +NULL NULL NULL NULL 237 val_237 +NULL NULL NULL NULL 237 val_237 +NULL NULL NULL NULL 239 val_239 +NULL NULL NULL NULL 239 val_239 +NULL NULL NULL NULL 24 val_24 +NULL NULL NULL NULL 24 val_24 +NULL NULL NULL NULL 241 val_241 +NULL NULL NULL NULL 242 val_242 +NULL NULL NULL NULL 242 val_242 +NULL NULL NULL NULL 244 val_244 +NULL NULL NULL NULL 247 val_247 +NULL NULL NULL NULL 248 val_248 +NULL NULL NULL NULL 249 val_249 +NULL NULL NULL NULL 252 val_252 +NULL NULL NULL NULL 256 val_256 +NULL NULL NULL NULL 256 val_256 +NULL NULL NULL NULL 257 val_257 +NULL NULL NULL NULL 258 val_258 +NULL NULL NULL NULL 26 val_26 +NULL NULL NULL NULL 26 val_26 +NULL NULL NULL NULL 260 val_260 +NULL NULL NULL NULL 262 val_262 +NULL NULL NULL NULL 263 val_263 +NULL NULL NULL NULL 266 val_266 +NULL NULL NULL NULL 272 val_272 +NULL NULL NULL NULL 272 val_272 +NULL NULL NULL NULL 274 val_274 +NULL NULL NULL NULL 275 val_275 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 277 val_277 +NULL NULL NULL NULL 28 val_28 +NULL NULL NULL NULL 280 val_280 +NULL NULL NULL NULL 280 val_280 +NULL NULL NULL NULL 281 val_281 +NULL NULL NULL NULL 281 val_281 +NULL NULL NULL NULL 282 val_282 +NULL NULL NULL NULL 282 val_282 +NULL NULL NULL NULL 283 val_283 +NULL NULL NULL NULL 284 val_284 +NULL NULL NULL NULL 285 val_285 +NULL NULL NULL NULL 286 val_286 +NULL NULL NULL NULL 287 val_287 +NULL NULL NULL NULL 288 val_288 +NULL NULL NULL NULL 288 val_288 +NULL NULL NULL NULL 289 val_289 +NULL NULL NULL NULL 291 val_291 +NULL NULL NULL NULL 292 val_292 +NULL NULL NULL NULL 296 val_296 +NULL NULL NULL NULL 298 val_298 +NULL NULL NULL NULL 298 val_298 +NULL NULL NULL NULL 298 val_298 +NULL NULL NULL NULL 30 val_30 +NULL NULL NULL NULL 302 val_302 +NULL NULL NULL NULL 305 val_305 +NULL NULL NULL NULL 306 val_306 +NULL NULL NULL NULL 307 val_307 +NULL NULL NULL NULL 307 val_307 +NULL NULL NULL NULL 308 val_308 +NULL NULL NULL NULL 309 val_309 +NULL NULL NULL NULL 309 val_309 +NULL NULL NULL NULL 310 val_310 +NULL NULL NULL NULL 315 val_315 +NULL NULL NULL NULL 316 val_316 +NULL NULL NULL NULL 316 val_316 +NULL NULL NULL NULL 316 val_316 +NULL NULL NULL NULL 317 val_317 +NULL NULL NULL NULL 317 val_317 +NULL NULL NULL NULL 318 val_318 +NULL NULL NULL NULL 318 val_318 +NULL NULL NULL NULL 318 val_318 +NULL NULL NULL NULL 321 val_321 +NULL NULL NULL NULL 321 val_321 +NULL NULL NULL NULL 322 val_322 +NULL NULL NULL NULL 322 val_322 +NULL NULL NULL NULL 323 val_323 +NULL NULL NULL NULL 325 val_325 +NULL NULL NULL NULL 325 val_325 +NULL NULL NULL NULL 327 val_327 +NULL NULL NULL NULL 327 val_327 +NULL NULL NULL NULL 327 val_327 +NULL NULL NULL NULL 33 val_33 +NULL NULL NULL NULL 331 val_331 +NULL NULL NULL NULL 331 val_331 +NULL NULL NULL NULL 332 val_332 +NULL NULL NULL NULL 333 val_333 +NULL NULL NULL NULL 333 val_333 +NULL NULL NULL NULL 335 val_335 +NULL NULL NULL NULL 336 val_336 +NULL NULL NULL NULL 338 val_338 +NULL NULL NULL NULL 339 val_339 +NULL NULL NULL NULL 34 val_34 +NULL NULL NULL NULL 341 val_341 +NULL NULL NULL NULL 342 val_342 +NULL NULL NULL NULL 342 val_342 +NULL NULL NULL NULL 344 val_344 +NULL NULL NULL NULL 344 val_344 +NULL NULL NULL NULL 345 val_345 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 348 val_348 +NULL NULL NULL NULL 35 val_35 +NULL NULL NULL NULL 35 val_35 +NULL NULL NULL NULL 35 val_35 +NULL NULL NULL NULL 351 val_351 +NULL NULL NULL NULL 353 val_353 +NULL NULL NULL NULL 353 val_353 +NULL NULL NULL NULL 356 val_356 +NULL NULL NULL NULL 360 val_360 +NULL NULL NULL NULL 362 val_362 +NULL NULL NULL NULL 364 val_364 +NULL NULL NULL NULL 365 val_365 +NULL NULL NULL NULL 366 val_366 +NULL NULL NULL NULL 367 val_367 +NULL NULL NULL NULL 367 val_367 +NULL NULL NULL NULL 368 val_368 +NULL NULL NULL NULL 369 val_369 +NULL NULL NULL NULL 369 val_369 +NULL NULL NULL NULL 369 val_369 +NULL NULL NULL NULL 37 val_37 +NULL NULL NULL NULL 37 val_37 +NULL NULL NULL NULL 373 val_373 +NULL NULL NULL NULL 374 val_374 +NULL NULL NULL NULL 375 val_375 +NULL NULL NULL NULL 377 val_377 +NULL NULL NULL NULL 378 val_378 +NULL NULL NULL NULL 379 val_379 +NULL NULL NULL NULL 382 val_382 +NULL NULL NULL NULL 382 val_382 +NULL NULL NULL NULL 384 val_384 +NULL NULL NULL NULL 384 val_384 +NULL NULL NULL NULL 384 val_384 +NULL NULL NULL NULL 386 val_386 +NULL NULL NULL NULL 389 val_389 +NULL NULL NULL NULL 392 val_392 +NULL NULL NULL NULL 393 val_393 +NULL NULL NULL NULL 394 val_394 +NULL NULL NULL NULL 395 val_395 +NULL NULL NULL NULL 395 val_395 +NULL NULL NULL NULL 396 val_396 +NULL NULL NULL NULL 396 val_396 +NULL NULL NULL NULL 396 val_396 +NULL NULL NULL NULL 397 val_397 +NULL NULL NULL NULL 397 val_397 +NULL NULL NULL NULL 399 val_399 +NULL NULL NULL NULL 399 val_399 +NULL NULL NULL NULL 4 val_4 +NULL NULL NULL NULL 400 val_400 +NULL NULL NULL NULL 402 val_402 +NULL NULL NULL NULL 403 val_403 +NULL NULL NULL NULL 403 val_403 +NULL NULL NULL NULL 403 val_403 +NULL NULL NULL NULL 404 val_404 +NULL NULL NULL NULL 404 val_404 +NULL NULL NULL NULL 407 val_407 +NULL NULL NULL NULL 41 val_41 +NULL NULL NULL NULL 411 val_411 +NULL NULL NULL NULL 413 val_413 +NULL NULL NULL NULL 413 val_413 +NULL NULL NULL NULL 414 val_414 +NULL NULL NULL NULL 414 val_414 +NULL NULL NULL NULL 417 val_417 +NULL NULL NULL NULL 417 val_417 +NULL NULL NULL NULL 417 val_417 +NULL NULL NULL NULL 418 val_418 +NULL NULL NULL NULL 419 val_419 +NULL NULL NULL NULL 42 val_42 +NULL NULL NULL NULL 42 val_42 +NULL NULL NULL NULL 421 val_421 +NULL NULL NULL NULL 424 val_424 +NULL NULL NULL NULL 424 val_424 +NULL NULL NULL NULL 427 val_427 +NULL NULL NULL NULL 429 val_429 +NULL NULL NULL NULL 429 val_429 +NULL NULL NULL NULL 43 val_43 +NULL NULL NULL NULL 430 val_430 +NULL NULL NULL NULL 430 val_430 +NULL NULL NULL NULL 430 val_430 +NULL NULL NULL NULL 431 val_431 +NULL NULL NULL NULL 431 val_431 +NULL NULL NULL NULL 431 val_431 +NULL NULL NULL NULL 432 val_432 +NULL NULL NULL NULL 435 val_435 +NULL NULL NULL NULL 436 val_436 +NULL NULL NULL NULL 437 val_437 +NULL NULL NULL NULL 438 val_438 +NULL NULL NULL NULL 438 val_438 +NULL NULL NULL NULL 438 val_438 +NULL NULL NULL NULL 439 val_439 +NULL NULL NULL NULL 439 val_439 +NULL NULL NULL NULL 44 val_44 +NULL NULL NULL NULL 443 val_443 +NULL NULL NULL NULL 444 val_444 +NULL NULL NULL NULL 446 val_446 +NULL NULL NULL NULL 448 val_448 +NULL NULL NULL NULL 449 val_449 +NULL NULL NULL NULL 452 val_452 +NULL NULL NULL NULL 453 val_453 +NULL NULL NULL NULL 454 val_454 +NULL NULL NULL NULL 454 val_454 +NULL NULL NULL NULL 454 val_454 +NULL NULL NULL NULL 455 val_455 +NULL NULL NULL NULL 457 val_457 +NULL NULL NULL NULL 458 val_458 +NULL NULL NULL NULL 458 val_458 +NULL NULL NULL NULL 459 val_459 +NULL NULL NULL NULL 459 val_459 +NULL NULL NULL NULL 460 val_460 +NULL NULL NULL NULL 462 val_462 +NULL NULL NULL NULL 462 val_462 +NULL NULL NULL NULL 463 val_463 +NULL NULL NULL NULL 463 val_463 +NULL NULL NULL NULL 466 val_466 +NULL NULL NULL NULL 466 val_466 +NULL NULL NULL NULL 466 val_466 +NULL NULL NULL NULL 467 val_467 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 468 val_468 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 469 val_469 +NULL NULL NULL NULL 47 val_47 +NULL NULL NULL NULL 470 val_470 +NULL NULL NULL NULL 472 val_472 +NULL NULL NULL NULL 475 val_475 +NULL NULL NULL NULL 477 val_477 +NULL NULL NULL NULL 478 val_478 +NULL NULL NULL NULL 478 val_478 +NULL NULL NULL NULL 479 val_479 +NULL NULL NULL NULL 480 val_480 +NULL NULL NULL NULL 480 val_480 +NULL NULL NULL NULL 480 val_480 +NULL NULL NULL NULL 481 val_481 +NULL NULL NULL NULL 482 val_482 +NULL NULL NULL NULL 483 val_483 +NULL NULL NULL NULL 485 val_485 +NULL NULL NULL NULL 487 val_487 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 489 val_489 +NULL NULL NULL NULL 490 val_490 +NULL NULL NULL NULL 491 val_491 +NULL NULL NULL NULL 492 val_492 +NULL NULL NULL NULL 492 val_492 +NULL NULL NULL NULL 493 val_493 +NULL NULL NULL NULL 494 val_494 +NULL NULL NULL NULL 495 val_495 +NULL NULL NULL NULL 496 val_496 +NULL NULL NULL NULL 497 val_497 +NULL NULL NULL NULL 498 val_498 +NULL NULL NULL NULL 498 val_498 +NULL NULL NULL NULL 498 val_498 +NULL NULL NULL NULL 5 val_5 +NULL NULL NULL NULL 5 val_5 +NULL NULL NULL NULL 5 val_5 +NULL NULL NULL NULL 51 val_51 +NULL NULL NULL NULL 51 val_51 +NULL NULL NULL NULL 53 val_53 +NULL NULL NULL NULL 54 val_54 +NULL NULL NULL NULL 57 val_57 +NULL NULL NULL NULL 58 val_58 +NULL NULL NULL NULL 58 val_58 +NULL NULL NULL NULL 64 val_64 +NULL NULL NULL NULL 65 val_65 +NULL NULL NULL NULL 67 val_67 +NULL NULL NULL NULL 67 val_67 +NULL NULL NULL NULL 69 val_69 +NULL NULL NULL NULL 70 val_70 +NULL NULL NULL NULL 70 val_70 +NULL NULL NULL NULL 70 val_70 +NULL NULL NULL NULL 72 val_72 +NULL NULL NULL NULL 72 val_72 +NULL NULL NULL NULL 74 val_74 +NULL NULL NULL NULL 76 val_76 +NULL NULL NULL NULL 76 val_76 +NULL NULL NULL NULL 77 val_77 +NULL NULL NULL NULL 78 val_78 +NULL NULL NULL NULL 8 val_8 +NULL NULL NULL NULL 80 val_80 +NULL NULL NULL NULL 82 val_82 +NULL NULL NULL NULL 83 val_83 +NULL NULL NULL NULL 83 val_83 +NULL NULL NULL NULL 84 val_84 +NULL NULL NULL NULL 84 val_84 +NULL NULL NULL NULL 85 val_85 +NULL NULL NULL NULL 86 val_86 +NULL NULL NULL NULL 87 val_87 +NULL NULL NULL NULL 9 val_9 +NULL NULL NULL NULL 90 val_90 +NULL NULL NULL NULL 90 val_90 +NULL NULL NULL NULL 90 val_90 +NULL NULL NULL NULL 92 val_92 +NULL NULL NULL NULL 95 val_95 +NULL NULL NULL NULL 95 val_95 +NULL NULL NULL NULL 96 val_96 +NULL NULL NULL NULL 97 val_97 +NULL NULL NULL NULL 97 val_97 + val_165 NULL NULL 165 val_165 + val_165 NULL NULL 165 val_165 + val_193 NULL NULL 193 val_193 + val_193 NULL NULL 193 val_193 + val_193 NULL NULL 193 val_193 + val_265 NULL NULL 265 val_265 + val_265 NULL NULL 265 val_265 + val_27 NULL NULL 27 val_27 + val_409 NULL NULL 409 val_409 + val_409 NULL NULL 409 val_409 + val_409 NULL NULL 409 val_409 + val_484 NULL NULL 484 val_484 +146 val_146 146 val_146 146 val_146 +146 val_146 146 val_146 146 val_146 +146 val_146 146 val_146 146 val_146 +146 val_146 146 val_146 146 val_146 +150 val_150 150 val_150 150 val_150 +213 val_213 213 val_213 213 val_213 +213 val_213 213 val_213 213 val_213 +213 val_213 213 val_213 213 val_213 +213 val_213 213 val_213 213 val_213 +238 val_238 238 val_238 238 val_238 +238 val_238 238 val_238 238 val_238 +238 val_238 238 val_238 238 val_238 +238 val_238 238 val_238 238 val_238 +255 val_255 255 val_255 255 val_255 +255 val_255 255 val_255 255 val_255 +255 val_255 255 val_255 255 val_255 +255 val_255 255 val_255 255 val_255 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +278 val_278 278 val_278 278 val_278 +278 val_278 278 val_278 278 val_278 +278 val_278 278 val_278 278 val_278 +278 val_278 278 val_278 278 val_278 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +66 val_66 66 val_66 66 val_66 +98 val_98 98 val_98 98 val_98 +98 val_98 98 val_98 98 val_98 +98 val_98 98 val_98 98 val_98 +98 val_98 98 val_98 98 val_98 +PREHOOK: query: select * from src1 a left outer join src b on (a.key = b.key) left outer join src c on (a.value = c.value) order by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select * from src1 a left outer join src b on (a.key = b.key) left outer join src c on (a.value = c.value) order by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + NULL NULL NULL NULL + NULL NULL NULL NULL + NULL NULL NULL NULL + NULL NULL NULL NULL + val_165 NULL NULL 165 val_165 + val_165 NULL NULL 165 val_165 + val_193 NULL NULL 193 val_193 + val_193 NULL NULL 193 val_193 + val_193 NULL NULL 193 val_193 + val_265 NULL NULL 265 val_265 + val_265 NULL NULL 265 val_265 + val_27 NULL NULL 27 val_27 + val_409 NULL NULL 409 val_409 + val_409 NULL NULL 409 val_409 + val_409 NULL NULL 409 val_409 + val_484 NULL NULL 484 val_484 +128 128 val_128 NULL NULL +128 128 val_128 NULL NULL +128 128 val_128 NULL NULL +146 val_146 146 val_146 146 val_146 +146 val_146 146 val_146 146 val_146 +146 val_146 146 val_146 146 val_146 +146 val_146 146 val_146 146 val_146 +150 val_150 150 val_150 150 val_150 +213 val_213 213 val_213 213 val_213 +213 val_213 213 val_213 213 val_213 +213 val_213 213 val_213 213 val_213 +213 val_213 213 val_213 213 val_213 +224 224 val_224 NULL NULL +224 224 val_224 NULL NULL +238 val_238 238 val_238 238 val_238 +238 val_238 238 val_238 238 val_238 +238 val_238 238 val_238 238 val_238 +238 val_238 238 val_238 238 val_238 +255 val_255 255 val_255 255 val_255 +255 val_255 255 val_255 255 val_255 +255 val_255 255 val_255 255 val_255 +255 val_255 255 val_255 255 val_255 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +278 val_278 278 val_278 278 val_278 +278 val_278 278 val_278 278 val_278 +278 val_278 278 val_278 278 val_278 +278 val_278 278 val_278 278 val_278 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +369 369 val_369 NULL NULL +369 369 val_369 NULL NULL +369 369 val_369 NULL NULL +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +66 val_66 66 val_66 66 val_66 +98 val_98 98 val_98 98 val_98 +98 val_98 98 val_98 98 val_98 +98 val_98 98 val_98 98 val_98 +98 val_98 98 val_98 98 val_98 +PREHOOK: query: select * from src1 a left outer join src b on (a.key = b.key) join src c on (a.key = c.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select * from src1 a left outer join src b on (a.key = b.key) join src c on (a.key = c.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 128 val_128 128 val_128 +128 128 val_128 128 val_128 +128 128 val_128 128 val_128 +128 128 val_128 128 val_128 +128 128 val_128 128 val_128 +128 128 val_128 128 val_128 +128 128 val_128 128 val_128 +128 128 val_128 128 val_128 +128 128 val_128 128 val_128 +146 val_146 146 val_146 146 val_146 +146 val_146 146 val_146 146 val_146 +146 val_146 146 val_146 146 val_146 +146 val_146 146 val_146 146 val_146 +150 val_150 150 val_150 150 val_150 +213 val_213 213 val_213 213 val_213 +213 val_213 213 val_213 213 val_213 +213 val_213 213 val_213 213 val_213 +213 val_213 213 val_213 213 val_213 +224 224 val_224 224 val_224 +224 224 val_224 224 val_224 +224 224 val_224 224 val_224 +224 224 val_224 224 val_224 +238 val_238 238 val_238 238 val_238 +238 val_238 238 val_238 238 val_238 +238 val_238 238 val_238 238 val_238 +238 val_238 238 val_238 238 val_238 +255 val_255 255 val_255 255 val_255 +255 val_255 255 val_255 255 val_255 +255 val_255 255 val_255 255 val_255 +255 val_255 255 val_255 255 val_255 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +278 val_278 278 val_278 278 val_278 +278 val_278 278 val_278 278 val_278 +278 val_278 278 val_278 278 val_278 +278 val_278 278 val_278 278 val_278 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +369 369 val_369 369 val_369 +369 369 val_369 369 val_369 +369 369 val_369 369 val_369 +369 369 val_369 369 val_369 +369 369 val_369 369 val_369 +369 369 val_369 369 val_369 +369 369 val_369 369 val_369 +369 369 val_369 369 val_369 +369 369 val_369 369 val_369 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +66 val_66 66 val_66 66 val_66 +98 val_98 98 val_98 98 val_98 +98 val_98 98 val_98 98 val_98 +98 val_98 98 val_98 98 val_98 +98 val_98 98 val_98 98 val_98 +PREHOOK: query: select * from src1 a join src b on (a.key = b.key) join src c on (a.key = c.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select * from src1 a join src b on (a.key = b.key) join src c on (a.key = c.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 128 val_128 128 val_128 +128 128 val_128 128 val_128 +128 128 val_128 128 val_128 +128 128 val_128 128 val_128 +128 128 val_128 128 val_128 +128 128 val_128 128 val_128 +128 128 val_128 128 val_128 +128 128 val_128 128 val_128 +128 128 val_128 128 val_128 +146 val_146 146 val_146 146 val_146 +146 val_146 146 val_146 146 val_146 +146 val_146 146 val_146 146 val_146 +146 val_146 146 val_146 146 val_146 +150 val_150 150 val_150 150 val_150 +213 val_213 213 val_213 213 val_213 +213 val_213 213 val_213 213 val_213 +213 val_213 213 val_213 213 val_213 +213 val_213 213 val_213 213 val_213 +224 224 val_224 224 val_224 +224 224 val_224 224 val_224 +224 224 val_224 224 val_224 +224 224 val_224 224 val_224 +238 val_238 238 val_238 238 val_238 +238 val_238 238 val_238 238 val_238 +238 val_238 238 val_238 238 val_238 +238 val_238 238 val_238 238 val_238 +255 val_255 255 val_255 255 val_255 +255 val_255 255 val_255 255 val_255 +255 val_255 255 val_255 255 val_255 +255 val_255 255 val_255 255 val_255 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +273 val_273 273 val_273 273 val_273 +278 val_278 278 val_278 278 val_278 +278 val_278 278 val_278 278 val_278 +278 val_278 278 val_278 278 val_278 +278 val_278 278 val_278 278 val_278 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +311 val_311 311 val_311 311 val_311 +369 369 val_369 369 val_369 +369 369 val_369 369 val_369 +369 369 val_369 369 val_369 +369 369 val_369 369 val_369 +369 369 val_369 369 val_369 +369 369 val_369 369 val_369 +369 369 val_369 369 val_369 +369 369 val_369 369 val_369 +369 369 val_369 369 val_369 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +401 val_401 401 val_401 401 val_401 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +406 val_406 406 val_406 406 val_406 +66 val_66 66 val_66 66 val_66 +98 val_98 98 val_98 98 val_98 +98 val_98 98 val_98 98 val_98 +98 val_98 98 val_98 98 val_98 +98 val_98 98 val_98 98 val_98 +PREHOOK: query: select count(*) from src1 a join src b on (a.key = b.key) join src c on (a.key = c.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from src1 a join src b on (a.key = b.key) join src c on (a.key = c.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +107 diff --git ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out new file mode 100644 index 0000000..d2b23ad --- /dev/null +++ ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out @@ -0,0 +1,702 @@ +PREHOOK: query: explain +select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 7 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) + Reducer 4 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 3 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 4 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} + 1 {VALUE._col0} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +NULL NULL 0 val_0 +NULL NULL 0 val_0 +NULL NULL 0 val_0 +NULL NULL 10 val_10 +NULL NULL 100 val_100 +NULL NULL 100 val_100 +NULL NULL 103 val_103 +NULL NULL 103 val_103 +NULL NULL 104 val_104 +NULL NULL 104 val_104 +NULL NULL 105 val_105 +NULL NULL 11 val_11 +NULL NULL 111 val_111 +NULL NULL 113 val_113 +NULL NULL 113 val_113 +NULL NULL 114 val_114 +NULL NULL 116 val_116 +NULL NULL 118 val_118 +NULL NULL 118 val_118 +NULL NULL 119 val_119 +NULL NULL 119 val_119 +NULL NULL 119 val_119 +NULL NULL 12 val_12 +NULL NULL 12 val_12 +NULL NULL 120 val_120 +NULL NULL 120 val_120 +NULL NULL 125 val_125 +NULL NULL 125 val_125 +NULL NULL 126 val_126 +NULL NULL 129 val_129 +NULL NULL 129 val_129 +NULL NULL 131 val_131 +NULL NULL 133 val_133 +NULL NULL 134 val_134 +NULL NULL 134 val_134 +NULL NULL 136 val_136 +NULL NULL 137 val_137 +NULL NULL 137 val_137 +NULL NULL 138 val_138 +NULL NULL 138 val_138 +NULL NULL 138 val_138 +NULL NULL 138 val_138 +NULL NULL 143 val_143 +NULL NULL 145 val_145 +NULL NULL 149 val_149 +NULL NULL 149 val_149 +NULL NULL 15 val_15 +NULL NULL 15 val_15 +NULL NULL 152 val_152 +NULL NULL 152 val_152 +NULL NULL 153 val_153 +NULL NULL 155 val_155 +NULL NULL 156 val_156 +NULL NULL 157 val_157 +NULL NULL 158 val_158 +NULL NULL 160 val_160 +NULL NULL 162 val_162 +NULL NULL 163 val_163 +NULL NULL 164 val_164 +NULL NULL 164 val_164 +NULL NULL 165 val_165 +NULL NULL 165 val_165 +NULL NULL 166 val_166 +NULL NULL 167 val_167 +NULL NULL 167 val_167 +NULL NULL 167 val_167 +NULL NULL 168 val_168 +NULL NULL 169 val_169 +NULL NULL 169 val_169 +NULL NULL 169 val_169 +NULL NULL 169 val_169 +NULL NULL 17 val_17 +NULL NULL 170 val_170 +NULL NULL 172 val_172 +NULL NULL 172 val_172 +NULL NULL 174 val_174 +NULL NULL 174 val_174 +NULL NULL 175 val_175 +NULL NULL 175 val_175 +NULL NULL 176 val_176 +NULL NULL 176 val_176 +NULL NULL 177 val_177 +NULL NULL 178 val_178 +NULL NULL 179 val_179 +NULL NULL 179 val_179 +NULL NULL 18 val_18 +NULL NULL 18 val_18 +NULL NULL 180 val_180 +NULL NULL 181 val_181 +NULL NULL 183 val_183 +NULL NULL 186 val_186 +NULL NULL 187 val_187 +NULL NULL 187 val_187 +NULL NULL 187 val_187 +NULL NULL 189 val_189 +NULL NULL 19 val_19 +NULL NULL 190 val_190 +NULL NULL 191 val_191 +NULL NULL 191 val_191 +NULL NULL 192 val_192 +NULL NULL 193 val_193 +NULL NULL 193 val_193 +NULL NULL 193 val_193 +NULL NULL 194 val_194 +NULL NULL 195 val_195 +NULL NULL 195 val_195 +NULL NULL 196 val_196 +NULL NULL 197 val_197 +NULL NULL 197 val_197 +NULL NULL 199 val_199 +NULL NULL 199 val_199 +NULL NULL 199 val_199 +NULL NULL 2 val_2 +NULL NULL 20 val_20 +NULL NULL 200 val_200 +NULL NULL 200 val_200 +NULL NULL 201 val_201 +NULL NULL 202 val_202 +NULL NULL 203 val_203 +NULL NULL 203 val_203 +NULL NULL 205 val_205 +NULL NULL 205 val_205 +NULL NULL 207 val_207 +NULL NULL 207 val_207 +NULL NULL 208 val_208 +NULL NULL 208 val_208 +NULL NULL 208 val_208 +NULL NULL 209 val_209 +NULL NULL 209 val_209 +NULL NULL 214 val_214 +NULL NULL 216 val_216 +NULL NULL 216 val_216 +NULL NULL 217 val_217 +NULL NULL 217 val_217 +NULL NULL 218 val_218 +NULL NULL 219 val_219 +NULL NULL 219 val_219 +NULL NULL 221 val_221 +NULL NULL 221 val_221 +NULL NULL 222 val_222 +NULL NULL 223 val_223 +NULL NULL 223 val_223 +NULL NULL 226 val_226 +NULL NULL 228 val_228 +NULL NULL 229 val_229 +NULL NULL 229 val_229 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 230 val_230 +NULL NULL 233 val_233 +NULL NULL 233 val_233 +NULL NULL 235 val_235 +NULL NULL 237 val_237 +NULL NULL 237 val_237 +NULL NULL 239 val_239 +NULL NULL 239 val_239 +NULL NULL 24 val_24 +NULL NULL 24 val_24 +NULL NULL 241 val_241 +NULL NULL 242 val_242 +NULL NULL 242 val_242 +NULL NULL 244 val_244 +NULL NULL 247 val_247 +NULL NULL 248 val_248 +NULL NULL 249 val_249 +NULL NULL 252 val_252 +NULL NULL 256 val_256 +NULL NULL 256 val_256 +NULL NULL 257 val_257 +NULL NULL 258 val_258 +NULL NULL 26 val_26 +NULL NULL 26 val_26 +NULL NULL 260 val_260 +NULL NULL 262 val_262 +NULL NULL 263 val_263 +NULL NULL 265 val_265 +NULL NULL 265 val_265 +NULL NULL 266 val_266 +NULL NULL 27 val_27 +NULL NULL 272 val_272 +NULL NULL 272 val_272 +NULL NULL 274 val_274 +NULL NULL 275 val_275 +NULL NULL 277 val_277 +NULL NULL 277 val_277 +NULL NULL 277 val_277 +NULL NULL 277 val_277 +NULL NULL 28 val_28 +NULL NULL 280 val_280 +NULL NULL 280 val_280 +NULL NULL 281 val_281 +NULL NULL 281 val_281 +NULL NULL 282 val_282 +NULL NULL 282 val_282 +NULL NULL 283 val_283 +NULL NULL 284 val_284 +NULL NULL 285 val_285 +NULL NULL 286 val_286 +NULL NULL 287 val_287 +NULL NULL 288 val_288 +NULL NULL 288 val_288 +NULL NULL 289 val_289 +NULL NULL 291 val_291 +NULL NULL 292 val_292 +NULL NULL 296 val_296 +NULL NULL 298 val_298 +NULL NULL 298 val_298 +NULL NULL 298 val_298 +NULL NULL 30 val_30 +NULL NULL 302 val_302 +NULL NULL 305 val_305 +NULL NULL 306 val_306 +NULL NULL 307 val_307 +NULL NULL 307 val_307 +NULL NULL 308 val_308 +NULL NULL 309 val_309 +NULL NULL 309 val_309 +NULL NULL 310 val_310 +NULL NULL 315 val_315 +NULL NULL 316 val_316 +NULL NULL 316 val_316 +NULL NULL 316 val_316 +NULL NULL 317 val_317 +NULL NULL 317 val_317 +NULL NULL 318 val_318 +NULL NULL 318 val_318 +NULL NULL 318 val_318 +NULL NULL 321 val_321 +NULL NULL 321 val_321 +NULL NULL 322 val_322 +NULL NULL 322 val_322 +NULL NULL 323 val_323 +NULL NULL 325 val_325 +NULL NULL 325 val_325 +NULL NULL 327 val_327 +NULL NULL 327 val_327 +NULL NULL 327 val_327 +NULL NULL 33 val_33 +NULL NULL 331 val_331 +NULL NULL 331 val_331 +NULL NULL 332 val_332 +NULL NULL 333 val_333 +NULL NULL 333 val_333 +NULL NULL 335 val_335 +NULL NULL 336 val_336 +NULL NULL 338 val_338 +NULL NULL 339 val_339 +NULL NULL 34 val_34 +NULL NULL 341 val_341 +NULL NULL 342 val_342 +NULL NULL 342 val_342 +NULL NULL 344 val_344 +NULL NULL 344 val_344 +NULL NULL 345 val_345 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 348 val_348 +NULL NULL 35 val_35 +NULL NULL 35 val_35 +NULL NULL 35 val_35 +NULL NULL 351 val_351 +NULL NULL 353 val_353 +NULL NULL 353 val_353 +NULL NULL 356 val_356 +NULL NULL 360 val_360 +NULL NULL 362 val_362 +NULL NULL 364 val_364 +NULL NULL 365 val_365 +NULL NULL 366 val_366 +NULL NULL 367 val_367 +NULL NULL 367 val_367 +NULL NULL 368 val_368 +NULL NULL 37 val_37 +NULL NULL 37 val_37 +NULL NULL 373 val_373 +NULL NULL 374 val_374 +NULL NULL 375 val_375 +NULL NULL 377 val_377 +NULL NULL 378 val_378 +NULL NULL 379 val_379 +NULL NULL 382 val_382 +NULL NULL 382 val_382 +NULL NULL 384 val_384 +NULL NULL 384 val_384 +NULL NULL 384 val_384 +NULL NULL 386 val_386 +NULL NULL 389 val_389 +NULL NULL 392 val_392 +NULL NULL 393 val_393 +NULL NULL 394 val_394 +NULL NULL 395 val_395 +NULL NULL 395 val_395 +NULL NULL 396 val_396 +NULL NULL 396 val_396 +NULL NULL 396 val_396 +NULL NULL 397 val_397 +NULL NULL 397 val_397 +NULL NULL 399 val_399 +NULL NULL 399 val_399 +NULL NULL 4 val_4 +NULL NULL 400 val_400 +NULL NULL 402 val_402 +NULL NULL 403 val_403 +NULL NULL 403 val_403 +NULL NULL 403 val_403 +NULL NULL 404 val_404 +NULL NULL 404 val_404 +NULL NULL 407 val_407 +NULL NULL 409 val_409 +NULL NULL 409 val_409 +NULL NULL 409 val_409 +NULL NULL 41 val_41 +NULL NULL 411 val_411 +NULL NULL 413 val_413 +NULL NULL 413 val_413 +NULL NULL 414 val_414 +NULL NULL 414 val_414 +NULL NULL 417 val_417 +NULL NULL 417 val_417 +NULL NULL 417 val_417 +NULL NULL 418 val_418 +NULL NULL 419 val_419 +NULL NULL 42 val_42 +NULL NULL 42 val_42 +NULL NULL 421 val_421 +NULL NULL 424 val_424 +NULL NULL 424 val_424 +NULL NULL 427 val_427 +NULL NULL 429 val_429 +NULL NULL 429 val_429 +NULL NULL 43 val_43 +NULL NULL 430 val_430 +NULL NULL 430 val_430 +NULL NULL 430 val_430 +NULL NULL 431 val_431 +NULL NULL 431 val_431 +NULL NULL 431 val_431 +NULL NULL 432 val_432 +NULL NULL 435 val_435 +NULL NULL 436 val_436 +NULL NULL 437 val_437 +NULL NULL 438 val_438 +NULL NULL 438 val_438 +NULL NULL 438 val_438 +NULL NULL 439 val_439 +NULL NULL 439 val_439 +NULL NULL 44 val_44 +NULL NULL 443 val_443 +NULL NULL 444 val_444 +NULL NULL 446 val_446 +NULL NULL 448 val_448 +NULL NULL 449 val_449 +NULL NULL 452 val_452 +NULL NULL 453 val_453 +NULL NULL 454 val_454 +NULL NULL 454 val_454 +NULL NULL 454 val_454 +NULL NULL 455 val_455 +NULL NULL 457 val_457 +NULL NULL 458 val_458 +NULL NULL 458 val_458 +NULL NULL 459 val_459 +NULL NULL 459 val_459 +NULL NULL 460 val_460 +NULL NULL 462 val_462 +NULL NULL 462 val_462 +NULL NULL 463 val_463 +NULL NULL 463 val_463 +NULL NULL 466 val_466 +NULL NULL 466 val_466 +NULL NULL 466 val_466 +NULL NULL 467 val_467 +NULL NULL 468 val_468 +NULL NULL 468 val_468 +NULL NULL 468 val_468 +NULL NULL 468 val_468 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 469 val_469 +NULL NULL 47 val_47 +NULL NULL 470 val_470 +NULL NULL 472 val_472 +NULL NULL 475 val_475 +NULL NULL 477 val_477 +NULL NULL 478 val_478 +NULL NULL 478 val_478 +NULL NULL 479 val_479 +NULL NULL 480 val_480 +NULL NULL 480 val_480 +NULL NULL 480 val_480 +NULL NULL 481 val_481 +NULL NULL 482 val_482 +NULL NULL 483 val_483 +NULL NULL 484 val_484 +NULL NULL 485 val_485 +NULL NULL 487 val_487 +NULL NULL 489 val_489 +NULL NULL 489 val_489 +NULL NULL 489 val_489 +NULL NULL 489 val_489 +NULL NULL 490 val_490 +NULL NULL 491 val_491 +NULL NULL 492 val_492 +NULL NULL 492 val_492 +NULL NULL 493 val_493 +NULL NULL 494 val_494 +NULL NULL 495 val_495 +NULL NULL 496 val_496 +NULL NULL 497 val_497 +NULL NULL 498 val_498 +NULL NULL 498 val_498 +NULL NULL 498 val_498 +NULL NULL 5 val_5 +NULL NULL 5 val_5 +NULL NULL 5 val_5 +NULL NULL 51 val_51 +NULL NULL 51 val_51 +NULL NULL 53 val_53 +NULL NULL 54 val_54 +NULL NULL 57 val_57 +NULL NULL 58 val_58 +NULL NULL 58 val_58 +NULL NULL 64 val_64 +NULL NULL 65 val_65 +NULL NULL 67 val_67 +NULL NULL 67 val_67 +NULL NULL 69 val_69 +NULL NULL 70 val_70 +NULL NULL 70 val_70 +NULL NULL 70 val_70 +NULL NULL 72 val_72 +NULL NULL 72 val_72 +NULL NULL 74 val_74 +NULL NULL 76 val_76 +NULL NULL 76 val_76 +NULL NULL 77 val_77 +NULL NULL 78 val_78 +NULL NULL 8 val_8 +NULL NULL 80 val_80 +NULL NULL 82 val_82 +NULL NULL 83 val_83 +NULL NULL 83 val_83 +NULL NULL 84 val_84 +NULL NULL 84 val_84 +NULL NULL 85 val_85 +NULL NULL 86 val_86 +NULL NULL 87 val_87 +NULL NULL 9 val_9 +NULL NULL 90 val_90 +NULL NULL 90 val_90 +NULL NULL 90 val_90 +NULL NULL 92 val_92 +NULL NULL 95 val_95 +NULL NULL 95 val_95 +NULL NULL 96 val_96 +NULL NULL 97 val_97 +NULL NULL 97 val_97 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +128 val_128 128 val_128 +146 val_146 146 val_146 +146 val_146 146 val_146 +146 val_146 146 val_146 +146 val_146 146 val_146 +150 val_150 150 val_150 +213 val_213 213 val_213 +213 val_213 213 val_213 +213 val_213 213 val_213 +213 val_213 213 val_213 +224 val_224 224 val_224 +224 val_224 224 val_224 +224 val_224 224 val_224 +224 val_224 224 val_224 +238 val_238 238 val_238 +238 val_238 238 val_238 +238 val_238 238 val_238 +238 val_238 238 val_238 +255 val_255 255 val_255 +255 val_255 255 val_255 +255 val_255 255 val_255 +255 val_255 255 val_255 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +273 val_273 273 val_273 +278 val_278 278 val_278 +278 val_278 278 val_278 +278 val_278 278 val_278 +278 val_278 278 val_278 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +311 val_311 311 val_311 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +369 val_369 369 val_369 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +401 val_401 401 val_401 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +406 val_406 406 val_406 +66 val_66 66 val_66 +98 val_98 98 val_98 +98 val_98 98 val_98 +98 val_98 98 val_98 +98 val_98 98 val_98 diff --git ql/src/test/results/clientpositive/spark/uniquejoin.q.out ql/src/test/results/clientpositive/spark/uniquejoin.q.out new file mode 100644 index 0000000..b6d3553 --- /dev/null +++ ql/src/test/results/clientpositive/spark/uniquejoin.q.out @@ -0,0 +1,174 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t3 +PREHOOK: query: FROM UNIQUEJOIN PRESERVE T1 a (a.key), PRESERVE T2 b (b.key), PRESERVE T3 c (c.key) +SELECT a.key, b.key, c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: FROM UNIQUEJOIN PRESERVE T1 a (a.key), PRESERVE T2 b (b.key), PRESERVE T3 c (c.key) +SELECT a.key, b.key, c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +1 NULL NULL +2 2 2 +3 3 NULL +NULL 4 4 +NULL 5 NULL +NULL NULL 6 +7 NULL 7 +8 8 NULL +8 8 NULL +8 8 NULL +8 8 NULL +PREHOOK: query: FROM UNIQUEJOIN T1 a (a.key), T2 b (b.key), T3 c (c.key) +SELECT a.key, b.key, c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: FROM UNIQUEJOIN T1 a (a.key), T2 b (b.key), T3 c (c.key) +SELECT a.key, b.key, c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +2 2 2 +PREHOOK: query: FROM UNIQUEJOIN T1 a (a.key), T2 b (b.key-1), T3 c (c.key) +SELECT a.key, b.key, c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: FROM UNIQUEJOIN T1 a (a.key), T2 b (b.key-1), T3 c (c.key) +SELECT a.key, b.key, c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +2 3 2 +7 8 7 +7 8 7 +PREHOOK: query: FROM UNIQUEJOIN PRESERVE T1 a (a.key, a.val), PRESERVE T2 b (b.key, b.val), PRESERVE T3 c (c.key, c.val) +SELECT a.key, a.val, b.key, b.val, c.key, c.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: FROM UNIQUEJOIN PRESERVE T1 a (a.key, a.val), PRESERVE T2 b (b.key, b.val), PRESERVE T3 c (c.key, c.val) +SELECT a.key, a.val, b.key, b.val, c.key, c.val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +1 11 NULL NULL NULL NULL +2 12 NULL NULL 2 12 +NULL NULL 2 22 NULL NULL +3 13 3 13 NULL NULL +NULL NULL 4 14 4 14 +NULL NULL 5 15 NULL NULL +NULL NULL NULL NULL 6 16 +7 17 NULL NULL 7 17 +8 18 8 18 NULL NULL +8 18 8 18 NULL NULL +8 28 NULL NULL NULL NULL +PREHOOK: query: FROM UNIQUEJOIN PRESERVE T1 a (a.key), T2 b (b.key), PRESERVE T3 c (c.key) +SELECT a.key, b.key, c.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: FROM UNIQUEJOIN PRESERVE T1 a (a.key), T2 b (b.key), PRESERVE T3 c (c.key) +SELECT a.key, b.key, c.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +1 NULL NULL +2 2 2 +3 3 NULL +NULL 4 4 +NULL NULL 6 +7 NULL 7 +8 8 NULL +8 8 NULL +8 8 NULL +8 8 NULL +PREHOOK: query: FROM UNIQUEJOIN PRESERVE T1 a (a.key), T2 b(b.key) +SELECT a.key, b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: FROM UNIQUEJOIN PRESERVE T1 a (a.key), T2 b(b.key) +SELECT a.key, b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +1 NULL +2 2 +3 3 +7 NULL +8 8 +8 8 +8 8 +8 8 diff --git ql/src/test/results/clientpositive/spark/varchar_join1.q.out ql/src/test/results/clientpositive/spark/varchar_join1.q.out new file mode 100644 index 0000000..9736fb1 --- /dev/null +++ ql/src/test/results/clientpositive/spark/varchar_join1.q.out @@ -0,0 +1,145 @@ +PREHOOK: query: drop table varchar_join1_vc1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_join1_vc1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_join1_vc2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_join1_vc2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_join1_str +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_join1_str +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_join1_vc1 ( + c1 int, + c2 varchar(10) +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_join1_vc1 +POSTHOOK: query: create table varchar_join1_vc1 ( + c1 int, + c2 varchar(10) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_join1_vc1 +PREHOOK: query: create table varchar_join1_vc2 ( + c1 int, + c2 varchar(20) +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_join1_vc2 +POSTHOOK: query: create table varchar_join1_vc2 ( + c1 int, + c2 varchar(20) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_join1_vc2 +PREHOOK: query: create table varchar_join1_str ( + c1 int, + c2 string +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_join1_str +POSTHOOK: query: create table varchar_join1_str ( + c1 int, + c2 string +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_join1_str +PREHOOK: query: load data local inpath '../../data/files/vc1.txt' into table varchar_join1_vc1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@varchar_join1_vc1 +POSTHOOK: query: load data local inpath '../../data/files/vc1.txt' into table varchar_join1_vc1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@varchar_join1_vc1 +PREHOOK: query: load data local inpath '../../data/files/vc1.txt' into table varchar_join1_vc2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@varchar_join1_vc2 +POSTHOOK: query: load data local inpath '../../data/files/vc1.txt' into table varchar_join1_vc2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@varchar_join1_vc2 +PREHOOK: query: load data local inpath '../../data/files/vc1.txt' into table varchar_join1_str +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@varchar_join1_str +POSTHOOK: query: load data local inpath '../../data/files/vc1.txt' into table varchar_join1_str +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@varchar_join1_str +PREHOOK: query: -- Join varchar with same length varchar +select * from varchar_join1_vc1 a join varchar_join1_vc1 b on (a.c2 = b.c2) order by a.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_join1_vc1 +#### A masked pattern was here #### +POSTHOOK: query: -- Join varchar with same length varchar +select * from varchar_join1_vc1 a join varchar_join1_vc1 b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_join1_vc1 +#### A masked pattern was here #### +1 abc 1 abc +2 abc 2 abc +3 abc 3 abc +PREHOOK: query: -- Join varchar with different length varchar +select * from varchar_join1_vc1 a join varchar_join1_vc2 b on (a.c2 = b.c2) order by a.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_join1_vc1 +PREHOOK: Input: default@varchar_join1_vc2 +#### A masked pattern was here #### +POSTHOOK: query: -- Join varchar with different length varchar +select * from varchar_join1_vc1 a join varchar_join1_vc2 b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_join1_vc1 +POSTHOOK: Input: default@varchar_join1_vc2 +#### A masked pattern was here #### +1 abc 1 abc +2 abc 2 abc +3 abc 3 abc +PREHOOK: query: -- Join varchar with string +select * from varchar_join1_vc1 a join varchar_join1_str b on (a.c2 = b.c2) order by a.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_join1_str +PREHOOK: Input: default@varchar_join1_vc1 +#### A masked pattern was here #### +POSTHOOK: query: -- Join varchar with string +select * from varchar_join1_vc1 a join varchar_join1_str b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_join1_str +POSTHOOK: Input: default@varchar_join1_vc1 +#### A masked pattern was here #### +1 abc 1 abc +2 abc 2 abc +3 abc 3 abc +PREHOOK: query: drop table varchar_join1_vc1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_join1_vc1 +PREHOOK: Output: default@varchar_join1_vc1 +POSTHOOK: query: drop table varchar_join1_vc1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_join1_vc1 +POSTHOOK: Output: default@varchar_join1_vc1 +PREHOOK: query: drop table varchar_join1_vc2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_join1_vc2 +PREHOOK: Output: default@varchar_join1_vc2 +POSTHOOK: query: drop table varchar_join1_vc2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_join1_vc2 +POSTHOOK: Output: default@varchar_join1_vc2 +PREHOOK: query: drop table varchar_join1_str +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_join1_str +PREHOOK: Output: default@varchar_join1_str +POSTHOOK: query: drop table varchar_join1_str +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_join1_str +POSTHOOK: Output: default@varchar_join1_str