diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index e966959..bb716fa 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -603,6 +603,19 @@ minillaplocal.query.files=acid_globallimit.q,\ vector_auto_smb_mapjoin_14.q,\ vector_decimal_2.q,\ vector_decimal_udf.q,\ + vector_groupby_cube1.q,\ + vector_groupby_grouping_id1.q,\ + vector_groupby_grouping_id2.q,\ + vector_groupby_grouping_id3.q,\ + vector_groupby_grouping_sets1.q,\ + vector_groupby_grouping_sets2.q,\ + vector_groupby_grouping_sets3.q,\ + vector_groupby_grouping_sets4.q,\ + vector_groupby_grouping_sets5.q,\ + vector_groupby_grouping_sets6.q,\ + vector_groupby_grouping_sets_grouping.q,\ + vector_groupby_grouping_sets_limit.q,\ + vector_groupby_grouping_window.q,\ vector_join30.q,\ vector_join_filters.q,\ vector_leftsemi_mapjoin.q,\ diff --git itests/src/test/resources/testconfiguration.properties.orig itests/src/test/resources/testconfiguration.properties.orig new file mode 100644 index 0000000..e966959 --- /dev/null +++ itests/src/test/resources/testconfiguration.properties.orig @@ -0,0 +1,1556 @@ +# NOTE: files should be listed in alphabetical order +minimr.query.files=infer_bucket_sort_map_operators.q,\ + infer_bucket_sort_dyn_part.q,\ + infer_bucket_sort_merge.q,\ + infer_bucket_sort_reducers_power_two.q,\ + infer_bucket_sort_num_buckets.q,\ + root_dir_external_table.q,\ + parallel_orderby.q,\ + bucket_num_reducers.q,\ + udf_using.q,\ + index_bitmap3.q,\ + index_bitmap_auto.q,\ + scriptfile1.q,\ + bucket_num_reducers2.q,\ + scriptfile1_win.q + +# These tests are disabled for minimr +# ql_rewrite_gbtoidx.q,\ +# ql_rewrite_gbtoidx_cbo_1.q,\ +# ql_rewrite_gbtoidx_cbo_2.q,\ +# smb_mapjoin_8.q,\ + + +# Tests that are not enabled for CLI Driver +disabled.query.files=ql_rewrite_gbtoidx.q,\ + ql_rewrite_gbtoidx_cbo_1.q,\ + cbo_rp_subq_in.q,\ + cbo_rp_subq_not_in.q,\ + cbo_rp_subq_exists.q,\ + ql_rewrite_gbtoidx_cbo_2.q,\ + rcfile_merge1.q,\ + smb_mapjoin_8.q,\ + stats_filemetadata.q + +# NOTE: Add tests to minitez only if it is very +# specific to tez and cannot be added to minillap. +minitez.query.files.shared=delete_orig_table.q,\ + orc_merge12.q,\ + orc_vectorization_ppd.q,\ + unionDistinct_2.q,\ + update_orig_table.q,\ + vector_join_part_col_char.q,\ + vector_non_string_partition.q,\ + vectorization_div0.q,\ + vectorization_limit.q + +# NOTE: Add tests to minitez only if it is very +# specific to tez and cannot be added to minillap. +minitez.query.files=explainuser_3.q,\ + explainanalyze_1.q,\ + explainanalyze_2.q,\ + explainanalyze_3.q,\ + explainanalyze_4.q,\ + explainanalyze_5.q,\ + hybridgrace_hashjoin_1.q,\ + hybridgrace_hashjoin_2.q,\ + multi_count_distinct.q,\ + tez_union_with_udf.q + + +minillap.shared.query.files=insert_into1.q,\ + insert_into2.q,\ + insert_values_orig_table.,\ + llapdecider.q,\ + mapreduce1.q,\ + mapreduce2.q,\ + orc_merge1.q,\ + orc_merge10.q,\ + orc_merge2.q,\ + orc_merge3.q,\ + orc_merge4.q,\ + orc_merge_diff_fs.q,\ + unionDistinct_1.q,\ + union_type_chk.q,\ + orc_ppd_basic.q,\ + orc_ppd_schema_evol_3a.q,\ + cte_2.q,\ + cte_4.q,\ + llap_nullscan.q,\ + dynamic_partition_pruning_2.q,\ + tez_union_dynamic_partition.q + +minillaplocal.shared.query.files=alter_merge_2_orc.q,\ + alter_merge_orc.q,\ + alter_merge_stats_orc.q,\ + auto_join0.q,\ + auto_join1.q,\ + auto_join21.q,\ + auto_join29.q,\ + auto_join30.q,\ + auto_join_filters.q,\ + auto_join_nulls.q,\ + auto_sortmerge_join_1.q,\ + auto_sortmerge_join_10.q,\ + auto_sortmerge_join_11.q,\ + auto_sortmerge_join_12.q,\ + auto_sortmerge_join_13.q,\ + auto_sortmerge_join_14.q,\ + auto_sortmerge_join_15.q,\ + auto_sortmerge_join_16.q,\ + auto_sortmerge_join_2.q,\ + auto_sortmerge_join_3.q,\ + auto_sortmerge_join_4.q,\ + auto_sortmerge_join_5.q,\ + auto_sortmerge_join_6.q,\ + auto_sortmerge_join_7.q,\ + auto_sortmerge_join_8.q,\ + auto_sortmerge_join_9.q,\ + bucket2.q,\ + bucket3.q,\ + bucket4.q,\ + bucket_map_join_tez1.q,\ + bucket_map_join_tez2.q,\ + cbo_gby.q,\ + cbo_gby_empty.q,\ + cbo_join.q,\ + cbo_limit.q,\ + cbo_semijoin.q,\ + cbo_simple_select.q,\ + cbo_stats.q,\ + cbo_subq_exists.q,\ + cbo_subq_in.q,\ + cbo_subq_not_in.q,\ + cbo_udf_udaf.q,\ + cbo_union.q,\ + cbo_views.q,\ + cbo_windowing.q,\ + column_names_with_leading_and_trailing_spaces.q,\ + constprog_dpp.q,\ + constprog_semijoin.q,\ + correlationoptimizer1.q,\ + count.q,\ + create_merge_compressed.q,\ + cross_join.q,\ + cross_product_check_1.q,\ + cross_product_check_2.q,\ + ctas.q,\ + cte_1.q,\ + cte_3.q,\ + cte_5.q,\ + cte_mat_1.q,\ + cte_mat_2.q,\ + cte_mat_3.q,\ + cte_mat_4.q,\ + cte_mat_5.q,\ + custom_input_output_format.q,\ + deleteAnalyze.q,\ + delete_all_non_partitioned.q,\ + delete_all_partitioned.q,\ + delete_tmp_table.q,\ + delete_where_no_match.q,\ + delete_where_non_partitioned.q,\ + delete_where_partitioned.q,\ + delete_whole_partition.q,\ + disable_merge_for_bucketing.q,\ + dynamic_partition_pruning.q,\ + dynamic_semijoin_reduction.q,\ + dynpart_sort_opt_vectorization.q,\ + dynpart_sort_optimization.q,\ + dynpart_sort_optimization2.q,\ + empty_join.q,\ + enforce_order.q,\ + filter_join_breaktask.q,\ + filter_join_breaktask2.q,\ + groupby1.q,\ + groupby2.q,\ + groupby3.q,\ + having.q,\ + identity_project_remove_skip.q,\ + insert1.q,\ + insert_orig_table.q,\ + insert_update_delete.q,\ + insert_values_dynamic_partitioned.q,\ + insert_values_non_partitioned.q,\ + insert_values_partitioned.q,\ + insert_values_tmp_table.q,\ + join0.q,\ + join1.q,\ + join_emit_interval.q,\ + join46.q,\ + join_nullsafe.q,\ + leftsemijoin.q,\ + limit_pushdown.q,\ + load_dyn_part1.q,\ + load_dyn_part2.q,\ + load_dyn_part3.q,\ + lvj_mapjoin.q,\ + mapjoin2.q,\ + mapjoin3.q,\ + mapjoin_decimal.q,\ + mapjoin_mapjoin.q,\ + mapjoin46.q,\ + merge1.q,\ + merge2.q,\ + mergejoin.q,\ + metadata_only_queries.q,\ + metadata_only_queries_with_filters.q,\ + metadataonly1.q,\ + mrr.q,\ + nonmr_fetch_threshold.q,\ + optimize_nullscan.q,\ + orc_analyze.q,\ + orc_merge11.q,\ + orc_merge5.q,\ + orc_merge6.q,\ + orc_merge7.q,\ + orc_merge8.q,\ + orc_merge9.q,\ + orc_merge_incompat1.q,\ + orc_merge_incompat2.q,\ + orc_merge_incompat3.q,\ + orc_ppd_schema_evol_1a.q,\ + orc_ppd_schema_evol_1b.q,\ + orc_ppd_schema_evol_2a.q,\ + orc_ppd_schema_evol_2b.q,\ + parallel.q,\ + ptf.q,\ + ptf_matchpath.q,\ + ptf_streaming.q,\ + sample1.q,\ + selectDistinctStar.q,\ + select_dummy_source.q,\ + skewjoin.q,\ + stats_noscan_1.q,\ + stats_only_null.q,\ + subquery_exists.q,\ + subquery_in.q,\ + temp_table.q,\ + tez_bmj_schema_evolution.q,\ + tez_dml.q,\ + tez_dynpart_hashjoin_1.q,\ + tez_dynpart_hashjoin_2.q,\ + tez_fsstat.q,\ + tez_insert_overwrite_local_directory_1.q,\ + tez_join.q,\ + tez_join_hash.q,\ + tez_join_result_complex.q,\ + tez_join_tests.q,\ + tez_joins_explain.q,\ + tez_multi_union.q,\ + tez_schema_evolution.q,\ + tez_self_join.q,\ + tez_smb_1.q,\ + tez_smb_main.q,\ + tez_union.q,\ + tez_union2.q,\ + tez_union_decimal.q,\ + tez_union_group_by.q,\ + tez_union_multiinsert.q,\ + tez_vector_dynpart_hashjoin_1.q,\ + tez_vector_dynpart_hashjoin_2.q,\ + union2.q,\ + union3.q,\ + union4.q,\ + union5.q,\ + union6.q,\ + union7.q,\ + union8.q,\ + union9.q,\ + union_stats.q,\ + update_after_multiple_inserts.q,\ + update_all_non_partitioned.q,\ + update_all_partitioned.q,\ + update_all_types.q,\ + update_tmp_table.q,\ + update_two_cols.q,\ + update_where_no_match.q,\ + update_where_non_partitioned.q,\ + update_where_partitioned.q,\ + vector_acid3.q,\ + vector_aggregate_9.q,\ + vector_aggregate_without_gby.q,\ + vector_auto_smb_mapjoin_14.q,\ + vector_between_columns.q,\ + vector_between_in.q,\ + vector_binary_join_groupby.q,\ + vector_bround.q,\ + vector_bucket.q,\ + vector_cast_constant.q,\ + vector_char_2.q,\ + vector_char_4.q,\ + vector_char_cast.q,\ + vector_char_mapjoin1.q,\ + vector_char_simple.q,\ + vector_coalesce.q,\ + vector_coalesce_2.q,\ + vector_coalesce_3.q,\ + vector_complex_all.q,\ + vector_complex_join.q,\ + vector_count.q,\ + vector_count_distinct.q,\ + vector_data_types.q,\ + vector_date_1.q,\ + vector_decimal_1.q,\ + vector_decimal_10_0.q,\ + vector_decimal_2.q,\ + vector_decimal_3.q,\ + vector_decimal_4.q,\ + vector_decimal_5.q,\ + vector_decimal_6.q,\ + vector_decimal_aggregate.q,\ + vector_decimal_cast.q,\ + vector_decimal_expressions.q,\ + vector_decimal_mapjoin.q,\ + vector_decimal_math_funcs.q,\ + vector_decimal_precision.q,\ + vector_decimal_round.q,\ + vector_decimal_round_2.q,\ + vector_decimal_trailing.q,\ + vector_decimal_udf.q,\ + vector_decimal_udf2.q,\ + vector_distinct_2.q,\ + vector_elt.q,\ + vector_groupby4.q,\ + vector_groupby6.q,\ + vector_groupby_3.q,\ + vector_groupby_mapjoin.q,\ + vector_groupby_reduce.q,\ + vector_grouping_sets.q,\ + vector_if_expr.q,\ + vector_include_no_sel.q,\ + vector_inner_join.q,\ + vector_interval_1.q,\ + vector_interval_2.q,\ + vector_interval_arithmetic.q,\ + vector_interval_mapjoin.q,\ + vector_join30.q,\ + vector_join_filters.q,\ + vector_join_nulls.q,\ + vector_left_outer_join.q,\ + vector_left_outer_join2.q,\ + vector_leftsemi_mapjoin.q,\ + vector_mapjoin_reduce.q,\ + vector_mr_diff_schema_alias.q,\ + vector_multi_insert.q,\ + vector_null_projection.q,\ + vector_nullsafe_join.q,\ + vector_nvl.q,\ + vector_orderby_5.q,\ + vector_outer_join0.q,\ + vector_outer_join1.q,\ + vector_outer_join2.q,\ + vector_outer_join3.q,\ + vector_outer_join4.q,\ + vector_outer_join5.q,\ + vector_outer_join6.q,\ + vector_partition_diff_num_cols.q,\ + vector_partitioned_date_time.q,\ + vector_reduce1.q,\ + vector_reduce2.q,\ + vector_reduce3.q,\ + vector_reduce_groupby_decimal.q,\ + vector_string_concat.q,\ + vector_struct_in.q,\ + vector_varchar_4.q,\ + vector_varchar_mapjoin1.q,\ + vector_varchar_simple.q,\ + vector_when_case_null.q,\ + vectorization_0.q,\ + vectorization_1.q,\ + vectorization_10.q,\ + vectorization_11.q,\ + vectorization_12.q,\ + vectorization_13.q,\ + vectorization_14.q,\ + vectorization_15.q,\ + vectorization_16.q,\ + vectorization_17.q,\ + vectorization_2.q,\ + vectorization_3.q,\ + vectorization_4.q,\ + vectorization_5.q,\ + vectorization_6.q,\ + vectorization_7.q,\ + vectorization_8.q,\ + vectorization_9.q,\ + vectorization_decimal_date.q,\ + vectorization_nested_udf.q,\ + vectorization_not.q,\ + vectorization_part.q,\ + vectorization_part_project.q,\ + vectorization_part_varchar.q,\ + vectorization_pushdown.q,\ + vectorization_short_regress.q,\ + vectorized_bucketmapjoin1.q,\ + vectorized_case.q,\ + vectorized_casts.q,\ + vectorized_context.q,\ + vectorized_date_funcs.q,\ + vectorized_distinct_gby.q,\ + vectorized_dynamic_partition_pruning.q,\ + vectorized_mapjoin.q,\ + vectorized_math_funcs.q,\ + vectorized_nested_mapjoin.q,\ + vectorized_parquet.q,\ + vectorized_parquet_types.q,\ + vectorized_ptf.q,\ + vectorized_rcfile_columnar.q,\ + vectorized_shufflejoin.q,\ + vectorized_string_funcs.q,\ + vectorized_timestamp.q,\ + vectorized_timestamp_funcs.q,\ + vectorized_timestamp_ints_casts.q + +minillap.query.files=acid_bucket_pruning.q,\ + bucket5.q,\ + bucket6.q,\ + except_distinct.q,\ + explainuser_2.q,\ + empty_dir_in_table.q,\ + intersect_all.q,\ + intersect_distinct.q,\ + intersect_merge.q,\ + llap_udf.q,\ + llapdecider.q,\ + reduce_deduplicate.q,\ + remote_script.q,\ + tez_aggr_part_stats.q,\ + tez_union_view.q,\ + file_with_header_footer.q,\ + external_table_with_space_in_location_path.q,\ + import_exported_table.q,\ + orc_llap_counters.q,\ + orc_llap_counters1.q,\ + load_hdfs_file_with_space_in_the_name.q,\ + orc_ppd_basic.q,\ + schemeAuthority.q,\ + schemeAuthority2.q,\ + temp_table_external.q,\ + table_nonprintable.q,\ + llap_nullscan.q,\ + unionDistinct_1.q,\ + rcfile_merge2.q,\ + rcfile_merge3.q,\ + rcfile_merge4.q,\ + rcfile_createas1.q,\ + orc_ppd_schema_evol_3a.q,\ + global_limit.q,\ + dynamic_partition_pruning_2.q,\ + tez_union_dynamic_partition.q,\ + load_fs2.q,\ + llap_stats.q,\ + multi_count_distinct_null.q + +minillaplocal.query.files=acid_globallimit.q,\ + acid_vectorization_missing_cols.q,\ + alter_merge_stats_orc.q,\ + auto_join30.q,\ + auto_join_filters.q,\ + auto_join_nulls.q,\ + auto_sortmerge_join_16.q,\ + auto_sortmerge_join_6.q,\ + auto_sortmerge_join_8.q,\ + auto_sortmerge_join_9.q,\ + autoColumnStats_1.q,\ + autoColumnStats_2.q,\ + bucket4.q,\ + bucket_groupby.q,\ + bucket_many.q,\ + bucket_map_join_tez1.q,\ + bucket_map_join_tez2.q,\ + bucketizedhiveinputformat.q,\ + bucketmapjoin6.q,\ + bucketmapjoin7.q,\ + bucketpruning1.q,\ + bucketsortoptimize_insert_2.q,\ + cbo_gby.q,\ + cbo_join.q,\ + cbo_limit.q,\ + cbo_rp_gby.q,\ + cbo_rp_join.q,\ + cbo_rp_lineage2.q,\ + cbo_rp_semijoin.q,\ + cbo_rp_unionDistinct_2.q,\ + cbo_rp_windowing_2.q,\ + cbo_subq_not_in.q,\ + constprog_dpp.q,\ + current_date_timestamp.q,\ + correlationoptimizer1.q,\ + correlationoptimizer2.q,\ + correlationoptimizer4.q,\ + correlationoptimizer6.q,\ + disable_merge_for_bucketing.q,\ + dynamic_partition_pruning.q,\ + dynamic_semijoin_reduction.q,\ + dynpart_sort_opt_vectorization.q,\ + dynpart_sort_optimization.q,\ + dynpart_sort_optimization_acid.q,\ + escape1.q,\ + escape2.q,\ + exchgpartition2lel.q,\ + explainuser_1.q,\ + explainuser_4.q,\ + groupby2.q,\ + hybridgrace_hashjoin_1.q,\ + hybridgrace_hashjoin_2.q,\ + infer_bucket_sort_bucketed_table.q,\ + input16_cc.q,\ + insert_dir_distcp.q,\ + insert_into_with_schema.q,\ + join1.q,\ + join_acid_non_acid.q,\ + join_filters.q,\ + join_nulls.q,\ + join_nullsafe.q,\ + leftsemijoin_mr.q,\ + limit_join_transpose.q,\ + lineage2.q,\ + lineage3.q,\ + list_bucket_dml_10.q,\ + llap_partitioned.q,\ + load_dyn_part5.q,\ + lvj_mapjoin.q,\ + mapjoin_decimal.q,\ + mapjoin_emit_interval.q,\ + mergejoin_3way.q,\ + mrr.q,\ + multiMapJoin1.q,\ + multiMapJoin2.q,\ + non_native_window_udf.q,\ + orc_analyze.q,\ + orc_llap.q,\ + orc_llap_nonvector.q,\ + orc_ppd_date.q,\ + orc_ppd_decimal.q,\ + orc_ppd_timestamp.q,\ + order_null.q,\ + partition_multilevels.q,\ + ptf.q,\ + ptf_streaming.q,\ + quotedid_smb.q,\ + sample10.q,\ + schema_evol_orc_acid_part.q,\ + schema_evol_orc_acid_part_update.q,\ + schema_evol_orc_acid_table.q,\ + schema_evol_orc_acid_table_update.q,\ + schema_evol_orc_acidvec_part.q,\ + schema_evol_orc_acidvec_part_update.q,\ + schema_evol_orc_acidvec_table.q,\ + schema_evol_orc_acidvec_table_update.q,\ + schema_evol_orc_nonvec_part.q,\ + schema_evol_orc_nonvec_part_all_complex.q,\ + schema_evol_orc_nonvec_part_all_primitive.q,\ + schema_evol_orc_nonvec_table.q,\ + schema_evol_orc_vec_part.q,\ + schema_evol_orc_vec_part_all_complex.q,\ + schema_evol_orc_vec_part_all_primitive.q,\ + schema_evol_orc_vec_table.q,\ + schema_evol_stats.q,\ + schema_evol_text_nonvec_part.q,\ + schema_evol_text_nonvec_part_all_complex.q,\ + schema_evol_text_nonvec_part_all_primitive.q,\ + schema_evol_text_nonvec_table.q,\ + schema_evol_text_vec_part.q,\ + schema_evol_text_vec_part_all_complex.q,\ + schema_evol_text_vec_part_all_primitive.q,\ + schema_evol_text_vec_table.q,\ + schema_evol_text_vecrow_part.q,\ + schema_evol_text_vecrow_part_all_complex.q,\ + schema_evol_text_vecrow_part_all_primitive.q,\ + schema_evol_text_vecrow_table.q,\ + selectDistinctStar.q,\ + semijoin.q,\ + smb_cache.q,\ + special_character_in_tabnames_1.q,\ + sqlmerge.q,\ + stats_based_fetch_decision.q,\ + subquery_notin.q,\ + subquery_nested_subquery.q, \ + subquery_shared_alias.q, \ + subquery_null_agg.q,\ + table_access_keys_stats.q,\ + tez_bmj_schema_evolution.q,\ + tez_dml.q,\ + tez_dynpart_hashjoin_1.q,\ + tez_dynpart_hashjoin_2.q,\ + tez_dynpart_hashjoin_3.q,\ + tez_fsstat.q,\ + tez_insert_overwrite_local_directory_1.q,\ + tez_join.q,\ + tez_join_result_complex.q,\ + tez_join_tests.q,\ + tez_joins_explain.q,\ + tez_multi_union.q,\ + tez_nway_join.q,\ + tez_schema_evolution.q,\ + tez_self_join.q,\ + tez_smb_1.q,\ + tez_smb_empty.q,\ + tez_smb_main.q,\ + tez_union.q,\ + tez_union2.q,\ + tez_union_decimal.q,\ + tez_union_group_by.q,\ + tez_union_multiinsert.q,\ + tez_vector_dynpart_hashjoin_1.q,\ + tez_vector_dynpart_hashjoin_2.q,\ + uber_reduce.q,\ + udaf_collect_set_2.q,\ + union_fast_stats.q,\ + union_remove_26.q,\ + union_top_level.q,\ + vector_auto_smb_mapjoin_14.q,\ + vector_decimal_2.q,\ + vector_decimal_udf.q,\ + vector_join30.q,\ + vector_join_filters.q,\ + vector_leftsemi_mapjoin.q,\ + vector_number_compare_projection.q,\ + vector_partitioned_date_time.q,\ + vector_udf1.q,\ + vectorization_short_regress.q,\ + vectorized_dynamic_partition_pruning.q,\ + vectorized_ptf.q,\ + windowing.q,\ + windowing_gby.q,\ + unionDistinct_2.q,\ + auto_smb_mapjoin_14.q,\ + subquery_views.q,\ + vector_nullsafe_join.q,\ + smb_mapjoin_18.q,\ + varchar_udf1.q,\ + vectorized_parquet.q,\ + bucketmapjoin2.q,\ + orc_ppd_varchar.q,\ + multi_insert.q,\ + cbo_rp_limit.q,\ + vector_interval_2.q,\ + cbo_semijoin.q,\ + parquet_predicate_pushdown.q,\ + vector_outer_join5.q,\ + smb_mapjoin_6.q,\ + multi_column_in.q,\ + orc_predicate_pushdown.q,\ + columnStatsUpdateForStatsOptimizer_1.q,\ + reduce_deduplicate_extended.q,\ + limit_pushdown3.q,\ + offset_limit.q,\ + vector_join_nulls.q,\ + correlationoptimizer3.q,\ + vectorization_0.q,\ + columnstats_part_coltype.q,\ + drop_partition_with_stats.q,\ + dynpart_sort_optimization2.q,\ + multi_column_in_single.q,\ + join32_lessSize.q,\ + alter_table_invalidate_column_stats.q,\ + bucketmapjoin1.q,\ + ppr_pushdown.q,\ + smb_mapjoin_14.q,\ + vector_between_in.q,\ + offset_limit_ppd_optimizer.q,\ + cluster.q,\ + subquery_in.q,\ + subquery_multi.q,\ + subquery_scalar.q,\ + stats11.q,\ + orc_create.q,\ + orc_split_elimination.q,\ + order_null.q,\ + skewjoinopt15.q,\ + authorization_2.q,\ + cbo_subq_in.q,\ + alter_merge_orc.q,\ + bucketsortoptimize_insert_6.q,\ + bucketmapjoin4.q,\ + orc_merge7.q,\ + column_access_stats.q,\ + smb_mapjoin_5.q,\ + vector_adaptor_usage_mode.q,\ + optimize_nullscan.q,\ + parquet_types.q,\ + groupby_grouping_id2.q,\ + constprog_semijoin.q,\ + ppd_union_view.q,\ + smb_mapjoin_19.q,\ + cbo_rp_views.q,\ + bucketsortoptimize_insert_7.q,\ + smb_mapjoin_15.q,\ + vectorized_nested_mapjoin.q,\ + skiphf_aggr.q,\ + multi_insert_lateral_view.q,\ + smb_mapjoin_4.q,\ + cbo_udf_udaf.q,\ + bucketmapjoin3.q,\ + metadataonly1.q,\ + lateral_view.q,\ + extrapolate_part_stats_partial_ndv.q,\ + cbo_views.q,\ + limit_pushdown.q,\ + cbo_rp_udf_udaf.q,\ + count.q,\ + vector_inner_join.q,\ + temp_table.q,\ + vector_partition_diff_num_cols.q,\ + vector_count_distinct.q,\ + cbo_rp_udf_udaf_stats_opt.q,\ + database.q,\ + smb_mapjoin_17.q,\ + groupby_resolution.q,\ + windowing_windowspec2.q,\ + vectorized_join46.q + +encrypted.query.files=encryption_join_unencrypted_tbl.q,\ + encryption_insert_partition_static.q,\ + encryption_insert_partition_dynamic.q,\ + encryption_join_with_different_encryption_keys.q,\ + encryption_select_read_only_encrypted_tbl.q,\ + encryption_select_read_only_unencrypted_tbl.q,\ + encryption_load_data_to_encrypted_tables.q, \ + encryption_unencrypted_nonhdfs_external_tables.q \ + encryption_move_tbl.q \ + encryption_drop_table.q \ + encryption_insert_values.q \ + encryption_drop_view.q \ + encryption_drop_partition.q \ + encryption_with_trash.q \ + encryption_ctas.q + +beeline.positive.exclude=add_part_exist.q,\ + alter1.q,\ + alter2.q,\ + alter4.q,\ + alter5.q,\ + alter_rename_partition.q,\ + alter_rename_partition_authorization.q,\ + archive.q,\ + archive_corrupt.q,\ + archive_mr_1806.q,\ + archive_multi.q,\ + archive_multi_mr_1806.q,\ + authorization_1.q,\ + authorization_2.q,\ + authorization_4.q,\ + authorization_5.q,\ + authorization_6.q,\ + authorization_7.q,\ + ba_table1.q,\ + ba_table2.q,\ + ba_table3.q,\ + ba_table_udfs.q,\ + binary_table_bincolserde.q,\ + binary_table_colserde.q,\ + cluster.q,\ + columnarserde_create_shortcut.q,\ + combine2.q,\ + constant_prop.q,\ + create_nested_type.q,\ + create_or_replace_view.q,\ + create_struct_table.q,\ + create_union_table.q,\ + database.q,\ + database_location.q,\ + database_properties.q,\ + describe_database_json.q,\ + drop_database_removes_partition_dirs.q,\ + escape1.q,\ + escape2.q,\ + exim_00_nonpart_empty.q,\ + exim_01_nonpart.q,\ + exim_02_00_part_empty.q,\ + exim_02_part.q,\ + exim_03_nonpart_over_compat.q,\ + exim_04_all_part.q,\ + exim_04_evolved_parts.q,\ + exim_05_some_part.q,\ + exim_06_one_part.q,\ + exim_07_all_part_over_nonoverlap.q,\ + exim_08_nonpart_rename.q,\ + exim_09_part_spec_nonoverlap.q,\ + exim_10_external_managed.q,\ + exim_11_managed_external.q,\ + exim_12_external_location.q,\ + exim_13_managed_location.q,\ + exim_14_managed_location_over_existing.q,\ + exim_15_external_part.q,\ + exim_16_part_external.q,\ + exim_17_part_managed.q,\ + exim_18_part_external.q,\ + exim_19_00_part_external_location.q,\ + exim_19_part_external_location.q,\ + exim_20_part_managed_location.q,\ + exim_21_export_authsuccess.q,\ + exim_22_import_exist_authsuccess.q,\ + exim_23_import_part_authsuccess.q,\ + exim_24_import_nonexist_authsuccess.q,\ + global_limit.q,\ + groupby_complex_types.q,\ + groupby_complex_types_multi_single_reducer.q,\ + index_auth.q,\ + index_auto.q,\ + index_auto_empty.q,\ + index_bitmap.q,\ + index_bitmap1.q,\ + index_bitmap2.q,\ + index_bitmap3.q,\ + index_bitmap_auto.q,\ + index_bitmap_rc.q,\ + index_compact.q,\ + index_compact_1.q,\ + index_compact_2.q,\ + index_compact_3.q,\ + index_stale_partitioned.q,\ + init_file.q,\ + input16.q,\ + input16_cc.q,\ + input46.q,\ + input_columnarserde.q,\ + input_dynamicserde.q,\ + input_lazyserde.q,\ + input_testxpath3.q,\ + input_testxpath4.q,\ + insert2_overwrite_partitions.q,\ + insertexternal1.q,\ + join_thrift.q,\ + lateral_view.q,\ + load_binary_data.q,\ + load_exist_part_authsuccess.q,\ + load_nonpart_authsuccess.q,\ + load_part_authsuccess.q,\ + loadpart_err.q,\ + lock1.q,\ + lock2.q,\ + lock3.q,\ + lock4.q,\ + merge_dynamic_partition.q,\ + multi_insert.q,\ + multi_insert_move_tasks_share_dependencies.q,\ + null_column.q,\ + ppd_clusterby.q,\ + query_with_semi.q,\ + rename_column.q,\ + sample6.q,\ + sample_islocalmode_hook.q,\ + set_processor_namespaces.q,\ + show_tables.q,\ + source.q,\ + split_sample.q,\ + str_to_map.q,\ + transform1.q,\ + udaf_collect_set.q,\ + udaf_context_ngrams.q,\ + udaf_histogram_numeric.q,\ + udaf_ngrams.q,\ + udaf_percentile_approx.q,\ + udf_array.q,\ + udf_bitmap_and.q,\ + udf_bitmap_or.q,\ + udf_explode.q,\ + udf_format_number.q,\ + udf_map.q,\ + udf_map_keys.q,\ + udf_map_values.q,\ + udf_mask.q,\ + udf_mask_first_n.q,\ + udf_mask_hash.q,\ + udf_mask_last_n.q,\ + udf_mask_show_first_n.q,\ + udf_mask_show_last_n.q,\ + udf_max.q,\ + udf_min.q,\ + udf_named_struct.q,\ + udf_percentile.q,\ + udf_printf.q,\ + udf_sentences.q,\ + udf_sort_array.q,\ + udf_sort_array_by.q,\ + udf_split.q,\ + udf_struct.q,\ + udf_substr.q,\ + udf_translate.q,\ + udf_union.q,\ + udf_xpath.q,\ + udtf_stack.q,\ + view.q,\ + virtual_column.q + +minimr.query.negative.files=cluster_tasklog_retrieval.q,\ + file_with_header_footer_negative.q,\ + local_mapred_error_cache.q,\ + mapreduce_stack_trace.q,\ + mapreduce_stack_trace_turnoff.q,\ + minimr_broken_pipe.q,\ + table_nonprintable_negative.q,\ + udf_local_resource.q + +# tests are sorted use: perl -pe 's@\\\s*\n@ @g' testconfiguration.properties \ +# | awk -F= '/spark.query.files/{print $2}' | perl -pe 's@.q *, *@\n@g' \ +# | egrep -v '^ *$' | sort -V | uniq | perl -pe 's@\n@.q, \\\n@g' | perl -pe 's@^@ @g' +spark.query.files=add_part_multiple.q, \ + alter_merge_orc.q, \ + alter_merge_stats_orc.q, \ + annotate_stats_join.q, \ + auto_join0.q, \ + auto_join1.q, \ + auto_join10.q, \ + auto_join11.q, \ + auto_join12.q, \ + auto_join13.q, \ + auto_join14.q, \ + auto_join15.q, \ + auto_join16.q, \ + auto_join17.q, \ + auto_join18.q, \ + auto_join18_multi_distinct.q, \ + auto_join19.q, \ + auto_join2.q, \ + auto_join20.q, \ + auto_join21.q, \ + auto_join22.q, \ + auto_join23.q, \ + auto_join24.q, \ + auto_join26.q, \ + auto_join27.q, \ + auto_join28.q, \ + auto_join29.q, \ + auto_join3.q, \ + auto_join30.q, \ + auto_join31.q, \ + auto_join4.q, \ + auto_join5.q, \ + auto_join6.q, \ + auto_join7.q, \ + auto_join8.q, \ + auto_join9.q, \ + auto_join_filters.q, \ + auto_join_nulls.q, \ + auto_join_reordering_values.q, \ + auto_join_stats.q, \ + auto_join_stats2.q, \ + auto_join_without_localtask.q, \ + auto_smb_mapjoin_14.q, \ + auto_sortmerge_join_1.q, \ + auto_sortmerge_join_10.q, \ + auto_sortmerge_join_12.q, \ + auto_sortmerge_join_13.q, \ + auto_sortmerge_join_14.q, \ + auto_sortmerge_join_15.q, \ + auto_sortmerge_join_16.q, \ + auto_sortmerge_join_3.q, \ + auto_sortmerge_join_4.q, \ + auto_sortmerge_join_5.q, \ + auto_sortmerge_join_6.q, \ + auto_sortmerge_join_7.q, \ + auto_sortmerge_join_8.q, \ + auto_sortmerge_join_9.q, \ + avro_compression_enabled_native.q, \ + avro_decimal_native.q, \ + avro_joins.q, \ + avro_joins_native.q, \ + bucket2.q, \ + bucket3.q, \ + bucket4.q, \ + bucket_map_join_1.q, \ + bucket_map_join_2.q, \ + bucket_map_join_spark1.q, \ + bucket_map_join_spark2.q, \ + bucket_map_join_spark3.q, \ + bucket_map_join_spark4.q, \ + bucket_map_join_tez1.q, \ + bucket_map_join_tez2.q, \ + bucketmapjoin1.q, \ + bucketmapjoin10.q, \ + bucketmapjoin11.q, \ + bucketmapjoin12.q, \ + bucketmapjoin13.q, \ + bucketmapjoin2.q, \ + bucketmapjoin3.q, \ + bucketmapjoin4.q, \ + bucketmapjoin5.q, \ + bucketmapjoin7.q, \ + bucketmapjoin8.q, \ + bucketmapjoin9.q, \ + bucketmapjoin_negative.q, \ + bucketmapjoin_negative2.q, \ + bucketmapjoin_negative3.q, \ + bucketsortoptimize_insert_2.q, \ + bucketsortoptimize_insert_4.q, \ + bucketsortoptimize_insert_6.q, \ + bucketsortoptimize_insert_7.q, \ + bucketsortoptimize_insert_8.q, \ + cbo_gby.q, \ + cbo_gby_empty.q, \ + cbo_limit.q, \ + cbo_semijoin.q, \ + cbo_simple_select.q, \ + cbo_stats.q, \ + cbo_subq_in.q, \ + cbo_subq_not_in.q, \ + cbo_udf_udaf.q, \ + cbo_union.q, \ + column_access_stats.q, \ + count.q, \ + create_merge_compressed.q, \ + cross_join.q, \ + cross_product_check_1.q, \ + cross_product_check_2.q, \ + ctas.q, \ + custom_input_output_format.q, \ + date_join1.q, \ + date_udf.q, \ + decimal_1_1.q, \ + decimal_join.q, \ + disable_merge_for_bucketing.q, \ + dynamic_rdd_cache.q, \ + enforce_order.q, \ + escape_clusterby1.q, \ + escape_distributeby1.q, \ + escape_orderby1.q, \ + escape_sortby1.q, \ + filter_join_breaktask.q, \ + filter_join_breaktask2.q, \ + groupby1.q, \ + groupby10.q, \ + groupby11.q, \ + groupby1_map.q, \ + groupby1_map_nomap.q, \ + groupby1_map_skew.q, \ + groupby1_noskew.q, \ + groupby2.q, \ + groupby2_map.q, \ + groupby2_map_multi_distinct.q, \ + groupby2_map_skew.q, \ + groupby2_noskew.q, \ + groupby2_noskew_multi_distinct.q, \ + groupby3.q, \ + groupby3_map.q, \ + groupby3_map_multi_distinct.q, \ + groupby3_map_skew.q, \ + groupby3_noskew.q, \ + groupby3_noskew_multi_distinct.q, \ + groupby4.q, \ + groupby4_map.q, \ + groupby4_map_skew.q, \ + groupby4_noskew.q, \ + groupby5.q, \ + groupby5_map.q, \ + groupby5_map_skew.q, \ + groupby5_noskew.q, \ + groupby6.q, \ + groupby6_map.q, \ + groupby6_map_skew.q, \ + groupby6_noskew.q, \ + groupby7.q, \ + groupby7_map.q, \ + groupby7_map_multi_single_reducer.q, \ + groupby7_map_skew.q, \ + groupby7_noskew.q, \ + groupby7_noskew_multi_single_reducer.q, \ + groupby8.q, \ + groupby8_map.q, \ + groupby8_map_skew.q, \ + groupby8_noskew.q, \ + groupby9.q, \ + groupby_bigdata.q, \ + groupby_complex_types.q, \ + groupby_complex_types_multi_single_reducer.q, \ + groupby_cube1.q, \ + groupby_grouping_id2.q, \ + groupby_map_ppr.q, \ + groupby_map_ppr_multi_distinct.q, \ + groupby_multi_insert_common_distinct.q, \ + groupby_multi_single_reducer.q, \ + groupby_multi_single_reducer2.q, \ + groupby_multi_single_reducer3.q, \ + groupby_position.q, \ + groupby_ppr.q, \ + groupby_ppr_multi_distinct.q, \ + groupby_resolution.q, \ + groupby_rollup1.q, \ + groupby_sort_1_23.q, \ + groupby_sort_skew_1.q, \ + groupby_sort_skew_1_23.q, \ + qroupby_limit_extrastep.q, \ + having.q, \ + identity_project_remove_skip.q, \ + index_auto_self_join.q, \ + innerjoin.q, \ + input12.q, \ + input13.q, \ + input14.q, \ + input17.q, \ + input18.q, \ + input1_limit.q, \ + input_part2.q, \ + insert_into1.q, \ + insert_into2.q, \ + insert_into3.q, \ + join0.q, \ + join1.q, \ + join10.q, \ + join11.q, \ + join12.q, \ + join13.q, \ + join14.q, \ + join15.q, \ + join16.q, \ + join17.q, \ + join18.q, \ + join18_multi_distinct.q, \ + join19.q, \ + join2.q, \ + join20.q, \ + join21.q, \ + join22.q, \ + join23.q, \ + join24.q, \ + join25.q, \ + join26.q, \ + join27.q, \ + join28.q, \ + join29.q, \ + join3.q, \ + join30.q, \ + join31.q, \ + join32.q, \ + join32_lessSize.q, \ + join33.q, \ + join34.q, \ + join35.q, \ + join36.q, \ + join37.q, \ + join38.q, \ + join39.q, \ + join4.q, \ + join41.q, \ + join5.q, \ + join6.q, \ + join7.q, \ + join8.q, \ + join9.q, \ + join_1to1.q, \ + join_alt_syntax.q, \ + join_array.q, \ + join_casesensitive.q, \ + join_cond_pushdown_1.q, \ + join_cond_pushdown_2.q, \ + join_cond_pushdown_3.q, \ + join_cond_pushdown_4.q, \ + join_cond_pushdown_unqual1.q, \ + join_cond_pushdown_unqual2.q, \ + join_cond_pushdown_unqual3.q, \ + join_cond_pushdown_unqual4.q, \ + join_filters_overlap.q, \ + join_hive_626.q, \ + join_literals.q, \ + join_map_ppr.q, \ + join_merge_multi_expressions.q, \ + join_merging.q, \ + join_nullsafe.q, \ + join_rc.q, \ + join_reorder.q, \ + join_reorder2.q, \ + join_reorder3.q, \ + join_reorder4.q, \ + join_star.q, \ + join_thrift.q, \ + join_vc.q, \ + join_view.q, \ + lateral_view_explode2.q, \ + leftsemijoin.q, \ + leftsemijoin_mr.q, \ + limit_partition_metadataonly.q, \ + limit_pushdown.q, \ + limit_pushdown2.q, \ + list_bucket_dml_2.q, \ + load_dyn_part1.q, \ + load_dyn_part10.q, \ + load_dyn_part11.q, \ + load_dyn_part12.q, \ + load_dyn_part13.q, \ + load_dyn_part14.q, \ + load_dyn_part15.q, \ + load_dyn_part2.q, \ + load_dyn_part3.q, \ + load_dyn_part4.q, \ + load_dyn_part5.q, \ + load_dyn_part6.q, \ + load_dyn_part7.q, \ + load_dyn_part8.q, \ + load_dyn_part9.q, \ + louter_join_ppr.q, \ + mapjoin1.q, \ + mapjoin_addjar.q, \ + mapjoin_decimal.q, \ + mapjoin_distinct.q, \ + mapjoin_filter_on_outerjoin.q, \ + mapjoin_mapjoin.q, \ + mapjoin_memcheck.q, \ + mapjoin_subquery.q, \ + mapjoin_subquery2.q, \ + mapjoin_test_outer.q, \ + mapreduce1.q, \ + mapreduce2.q, \ + merge1.q, \ + merge2.q, \ + mergejoins.q, \ + mergejoins_mixed.q, \ + metadata_only_queries.q, \ + metadata_only_queries_with_filters.q, \ + multi_insert.q, \ + multi_insert_gby.q, \ + multi_insert_gby2.q, \ + multi_insert_gby3.q, \ + multi_insert_lateral_view.q, \ + multi_insert_mixed.q, \ + multi_insert_move_tasks_share_dependencies.q, \ + multi_insert_with_join.q, \ + multi_join_union.q, \ + multi_join_union_src.q, \ + multigroupby_singlemr.q, \ + nullgroup.q, \ + nullgroup2.q, \ + nullgroup4.q, \ + nullgroup4_multi_distinct.q, \ + optimize_nullscan.q, \ + order.q, \ + order2.q, \ + outer_join_ppr.q, \ + parallel.q, \ + parallel_join0.q, \ + parallel_join1.q, \ + parquet_join.q, \ + pcr.q, \ + ppd_gby_join.q, \ + ppd_join.q, \ + ppd_join2.q, \ + ppd_join3.q, \ + ppd_join5.q, \ + ppd_join_filter.q, \ + ppd_multi_insert.q, \ + ppd_outer_join1.q, \ + ppd_outer_join2.q, \ + ppd_outer_join3.q, \ + ppd_outer_join4.q, \ + ppd_outer_join5.q, \ + ppd_transform.q, \ + ptf.q, \ + ptf_decimal.q, \ + ptf_general_queries.q, \ + ptf_matchpath.q, \ + ptf_rcfile.q, \ + ptf_register_tblfn.q, \ + ptf_seqfile.q, \ + ptf_streaming.q, \ + rcfile_bigdata.q, \ + reduce_deduplicate_exclude_join.q, \ + router_join_ppr.q, \ + runtime_skewjoin_mapjoin_spark.q, \ + sample1.q, \ + sample10.q, \ + sample2.q, \ + sample3.q, \ + sample4.q, \ + sample5.q, \ + sample6.q, \ + sample7.q, \ + sample8.q, \ + sample9.q, \ + script_env_var1.q, \ + script_env_var2.q, \ + script_pipe.q, \ + scriptfile1.q, \ + semijoin.q, \ + skewjoin.q, \ + skewjoin_noskew.q, \ + skewjoin_union_remove_1.q, \ + skewjoin_union_remove_2.q, \ + skewjoinopt1.q, \ + skewjoinopt10.q, \ + skewjoinopt11.q, \ + skewjoinopt12.q, \ + skewjoinopt13.q, \ + skewjoinopt14.q, \ + skewjoinopt15.q, \ + skewjoinopt16.q, \ + skewjoinopt17.q, \ + skewjoinopt18.q, \ + skewjoinopt19.q, \ + skewjoinopt2.q, \ + skewjoinopt20.q, \ + skewjoinopt3.q, \ + skewjoinopt4.q, \ + skewjoinopt5.q, \ + skewjoinopt6.q, \ + skewjoinopt7.q, \ + skewjoinopt8.q, \ + skewjoinopt9.q, \ + smb_mapjoin_1.q, \ + smb_mapjoin_10.q, \ + smb_mapjoin_11.q, \ + smb_mapjoin_12.q, \ + smb_mapjoin_13.q, \ + smb_mapjoin_14.q, \ + smb_mapjoin_15.q, \ + smb_mapjoin_16.q, \ + smb_mapjoin_17.q, \ + smb_mapjoin_18.q, \ + smb_mapjoin_19.q, \ + smb_mapjoin_2.q, \ + smb_mapjoin_20.q, \ + smb_mapjoin_21.q, \ + smb_mapjoin_22.q, \ + smb_mapjoin_25.q, \ + smb_mapjoin_3.q, \ + smb_mapjoin_4.q, \ + smb_mapjoin_5.q, \ + smb_mapjoin_6.q, \ + smb_mapjoin_7.q, \ + smb_mapjoin_8.q, \ + smb_mapjoin_9.q, \ + sort.q, \ + stats0.q, \ + stats1.q, \ + stats10.q, \ + stats12.q, \ + stats13.q, \ + stats14.q, \ + stats15.q, \ + stats16.q, \ + stats18.q, \ + stats2.q, \ + stats3.q, \ + stats5.q, \ + stats6.q, \ + stats7.q, \ + stats8.q, \ + stats9.q, \ + stats_noscan_1.q, \ + stats_noscan_2.q, \ + stats_only_null.q, \ + stats_partscan_1_23.q, \ + statsfs.q, \ + subquery_exists.q, \ + subquery_in.q, \ + subquery_multiinsert.q, \ + table_access_keys_stats.q, \ + temp_table.q, \ + temp_table_gb1.q, \ + temp_table_join1.q, \ + tez_join_tests.q, \ + tez_joins_explain.q, \ + timestamp_1.q, \ + timestamp_2.q, \ + timestamp_3.q, \ + timestamp_comparison.q, \ + timestamp_lazy.q, \ + timestamp_null.q, \ + timestamp_udf.q, \ + transform2.q, \ + transform_ppr1.q, \ + transform_ppr2.q, \ + udaf_collect_set.q, \ + udf_example_add.q, \ + udf_in_file.q, \ + udf_max.q, \ + udf_min.q, \ + udf_percentile.q, \ + union.q, \ + union10.q, \ + union11.q, \ + union12.q, \ + union13.q, \ + union14.q, \ + union15.q, \ + union16.q, \ + union17.q, \ + union18.q, \ + union19.q, \ + union2.q, \ + union20.q, \ + union21.q, \ + union22.q, \ + union23.q, \ + union24.q, \ + union25.q, \ + union26.q, \ + union27.q, \ + union28.q, \ + union29.q, \ + union3.q, \ + union30.q, \ + union31.q, \ + union32.q, \ + union33.q, \ + union34.q, \ + union4.q, \ + union5.q, \ + union6.q, \ + union7.q, \ + union8.q, \ + union9.q, \ + union_date.q, \ + union_date_trim.q, \ + union_lateralview.q, \ + union_null.q, \ + union_ppr.q, \ + union_remove_1.q, \ + union_remove_10.q, \ + union_remove_11.q, \ + union_remove_12.q, \ + union_remove_13.q, \ + union_remove_14.q, \ + union_remove_15.q, \ + union_remove_16.q, \ + union_remove_17.q, \ + union_remove_18.q, \ + union_remove_19.q, \ + union_remove_2.q, \ + union_remove_20.q, \ + union_remove_21.q, \ + union_remove_22.q, \ + union_remove_23.q, \ + union_remove_24.q, \ + union_remove_25.q, \ + union_remove_3.q, \ + union_remove_4.q, \ + union_remove_5.q, \ + union_remove_6.q, \ + union_remove_6_subq.q, \ + union_remove_7.q, \ + union_remove_8.q, \ + union_remove_9.q, \ + union_script.q, \ + union_top_level.q, \ + union_view.q, \ + uniquejoin.q, \ + varchar_join1.q, \ + vector_between_in.q, \ + vector_cast_constant.q, \ + vector_char_4.q, \ + vector_count_distinct.q, \ + vector_data_types.q, \ + vector_decimal_aggregate.q, \ + vector_decimal_mapjoin.q, \ + vector_distinct_2.q, \ + vector_elt.q, \ + vector_groupby_3.q, \ + vector_left_outer_join.q, \ + vector_mapjoin_reduce.q, \ + vector_orderby_5.q, \ + vector_string_concat.q, \ + vector_varchar_4.q, \ + vectorization_0.q, \ + vectorization_1.q, \ + vectorization_10.q, \ + vectorization_11.q, \ + vectorization_12.q, \ + vectorization_13.q, \ + vectorization_14.q, \ + vectorization_15.q, \ + vectorization_16.q, \ + vectorization_17.q, \ + vectorization_2.q, \ + vectorization_3.q, \ + vectorization_4.q, \ + vectorization_5.q, \ + vectorization_6.q, \ + vectorization_9.q, \ + vectorization_decimal_date.q, \ + vectorization_div0.q, \ + vectorization_nested_udf.q, \ + vectorization_not.q, \ + vectorization_part.q, \ + vectorization_part_project.q, \ + vectorization_pushdown.q, \ + vectorization_short_regress.q, \ + vectorized_case.q, \ + vectorized_mapjoin.q, \ + vectorized_math_funcs.q, \ + vectorized_nested_mapjoin.q, \ + vectorized_ptf.q, \ + vectorized_rcfile_columnar.q, \ + vectorized_shufflejoin.q, \ + vectorized_string_funcs.q, \ + vectorized_timestamp_funcs.q, \ + windowing.q + +# Unlike "spark.query.files" above, these tests only run +# under Spark engine. +spark.only.query.files=spark_combine_equivalent_work.q,\ + spark_dynamic_partition_pruning.q,\ + spark_dynamic_partition_pruning_2.q,\ + spark_vectorized_dynamic_partition_pruning.q + +miniSparkOnYarn.query.files=auto_sortmerge_join_16.q,\ + bucket4.q,\ + bucket5.q,\ + bucket6.q,\ + bucketizedhiveinputformat.q,\ + bucketmapjoin6.q,\ + bucketmapjoin7.q,\ + constprog_partitioner.q,\ + constprog_semijoin.q,\ + disable_merge_for_bucketing.q,\ + empty_dir_in_table.q,\ + external_table_with_space_in_location_path.q,\ + file_with_header_footer.q,\ + gen_udf_example_add10.q,\ + import_exported_table.q,\ + index_bitmap3.q,\ + index_bitmap_auto.q,\ + infer_bucket_sort_bucketed_table.q,\ + infer_bucket_sort_map_operators.q,\ + infer_bucket_sort_merge.q,\ + infer_bucket_sort_num_buckets.q,\ + infer_bucket_sort_reducers_power_two.q,\ + input16_cc.q,\ + insert_overwrite_directory2.q,\ + leftsemijoin_mr.q,\ + list_bucket_dml_10.q,\ + load_fs2.q,\ + load_hdfs_file_with_space_in_the_name.q,\ + orc_merge1.q,\ + orc_merge2.q,\ + orc_merge3.q,\ + orc_merge4.q,\ + orc_merge5.q,\ + orc_merge6.q,\ + orc_merge7.q,\ + orc_merge8.q,\ + orc_merge9.q,\ + orc_merge_diff_fs.q,\ + orc_merge_incompat1.q,\ + orc_merge_incompat2.q,\ + parallel_orderby.q,\ + quotedid_smb.q,\ + reduce_deduplicate.q,\ + remote_script.q,\ + root_dir_external_table.q,\ + schemeAuthority.q,\ + schemeAuthority2.q,\ + scriptfile1.q,\ + scriptfile1_win.q,\ + temp_table_external.q,\ + truncate_column_buckets.q,\ + uber_reduce.q,\ + vector_inner_join.q,\ + vector_outer_join0.q,\ + vector_outer_join1.q,\ + vector_outer_join2.q,\ + vector_outer_join3.q,\ + vector_outer_join4.q,\ + vector_outer_join5.q + +# These tests are removed from miniSparkOnYarn.query.files +# ql_rewrite_gbtoidx.q,\ +# ql_rewrite_gbtoidx_cbo_1.q,\ +# smb_mapjoin_8.q,\ + + +spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\ + groupby2_multi_distinct.q,\ + groupby3_map_skew_multi_distinct.q,\ + groupby3_multi_distinct.q,\ + groupby_grouping_sets7.q diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java index 935b47b..7ac4f07 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java @@ -20,10 +20,8 @@ import java.util.Arrays; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** * Class to keep information on a set of typed vector columns. Used by @@ -64,147 +62,87 @@ */ protected int[] intervalDayTimeIndices; - /** - * Helper class for looking up a key value based on key index. - */ - public class KeyLookupHelper { - public int longIndex; - public int doubleIndex; - public int stringIndex; - public int decimalIndex; - public int timestampIndex; - public int intervalDayTimeIndex; - - private static final int INDEX_UNUSED = -1; - - private void resetIndices() { - this.longIndex = this.doubleIndex = this.stringIndex = this.decimalIndex = - timestampIndex = intervalDayTimeIndex = INDEX_UNUSED; - } - public void setLong(int index) { - resetIndices(); - this.longIndex= index; - } - - public void setDouble(int index) { - resetIndices(); - this.doubleIndex = index; - } - - public void setString(int index) { - resetIndices(); - this.stringIndex = index; - } - - public void setDecimal(int index) { - resetIndices(); - this.decimalIndex = index; - } - - public void setTimestamp(int index) { - resetIndices(); - this.timestampIndex= index; - } - - public void setIntervalDayTime(int index) { - resetIndices(); - this.intervalDayTimeIndex= index; - } - } - - /** - * Lookup vector to map from key index to primitive type index. - */ - protected KeyLookupHelper[] indexLookup; + final protected int keyCount; + private int addKeyIndex; - private int keyCount; - private int addIndex; + private int addLongIndex; + private int addDoubleIndex; + private int addStringIndex; + private int addDecimalIndex; + private int addTimestampIndex; + private int addIntervalDayTimeIndex; - protected int longIndicesIndex; - protected int doubleIndicesIndex; - protected int stringIndicesIndex; - protected int decimalIndicesIndex; - protected int timestampIndicesIndex; - protected int intervalDayTimeIndicesIndex; + // Given the keyIndex these arrays return: + // The ColumnVector.Type, + // The type specific index into longIndices, doubleIndices, etc... + protected ColumnVector.Type[] columnVectorTypes; + protected int[] columnTypeSpecificIndices; protected VectorColumnSetInfo(int keyCount) { this.keyCount = keyCount; - this.addIndex = 0; + this.addKeyIndex = 0; // We'll over allocate and then shrink the array for each type longIndices = new int[this.keyCount]; - longIndicesIndex = 0; + addLongIndex = 0; doubleIndices = new int[this.keyCount]; - doubleIndicesIndex = 0; + addDoubleIndex = 0; stringIndices = new int[this.keyCount]; - stringIndicesIndex = 0; + addStringIndex = 0; decimalIndices = new int[this.keyCount]; - decimalIndicesIndex = 0; + addDecimalIndex = 0; timestampIndices = new int[this.keyCount]; - timestampIndicesIndex = 0; + addTimestampIndex = 0; intervalDayTimeIndices = new int[this.keyCount]; - intervalDayTimeIndicesIndex = 0; - indexLookup = new KeyLookupHelper[this.keyCount]; - } + addIntervalDayTimeIndex = 0; - protected void addKey(String outputType) throws HiveException { - indexLookup[addIndex] = new KeyLookupHelper(); + columnVectorTypes = new ColumnVector.Type[this.keyCount]; + columnTypeSpecificIndices = new int[this.keyCount]; + } - String typeName = VectorizationContext.mapTypeNameSynonyms(outputType); - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); - Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + protected void addKey(ColumnVector.Type columnVectorType) throws HiveException { switch (columnVectorType) { case LONG: - longIndices[longIndicesIndex] = addIndex; - indexLookup[addIndex].setLong(longIndicesIndex); - ++longIndicesIndex; + longIndices[addLongIndex] = addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addLongIndex++; break; - case DOUBLE: - doubleIndices[doubleIndicesIndex] = addIndex; - indexLookup[addIndex].setDouble(doubleIndicesIndex); - ++doubleIndicesIndex; + doubleIndices[addDoubleIndex] = addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addDoubleIndex++; break; - case BYTES: - stringIndices[stringIndicesIndex]= addIndex; - indexLookup[addIndex].setString(stringIndicesIndex); - ++stringIndicesIndex; + stringIndices[addStringIndex]= addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addStringIndex++; break; - case DECIMAL: - decimalIndices[decimalIndicesIndex]= addIndex; - indexLookup[addIndex].setDecimal(decimalIndicesIndex); - ++decimalIndicesIndex; - break; - + decimalIndices[addDecimalIndex]= addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addDecimalIndex++; + break; case TIMESTAMP: - timestampIndices[timestampIndicesIndex] = addIndex; - indexLookup[addIndex].setTimestamp(timestampIndicesIndex); - ++timestampIndicesIndex; + timestampIndices[addTimestampIndex] = addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addTimestampIndex++; break; - case INTERVAL_DAY_TIME: - intervalDayTimeIndices[intervalDayTimeIndicesIndex] = addIndex; - indexLookup[addIndex].setIntervalDayTime(intervalDayTimeIndicesIndex); - ++intervalDayTimeIndicesIndex; + intervalDayTimeIndices[addIntervalDayTimeIndex] = addKeyIndex; + columnTypeSpecificIndices[addKeyIndex] = addIntervalDayTimeIndex++; break; - default: throw new HiveException("Unexpected column vector type " + columnVectorType); } - addIndex++; + columnVectorTypes[addKeyIndex] = columnVectorType; + addKeyIndex++; } - protected void finishAdding() { - longIndices = Arrays.copyOf(longIndices, longIndicesIndex); - doubleIndices = Arrays.copyOf(doubleIndices, doubleIndicesIndex); - stringIndices = Arrays.copyOf(stringIndices, stringIndicesIndex); - decimalIndices = Arrays.copyOf(decimalIndices, decimalIndicesIndex); - timestampIndices = Arrays.copyOf(timestampIndices, timestampIndicesIndex); - intervalDayTimeIndices = Arrays.copyOf(intervalDayTimeIndices, intervalDayTimeIndicesIndex); + + protected void finishAdding() throws HiveException { + longIndices = Arrays.copyOf(longIndices, addLongIndex); + doubleIndices = Arrays.copyOf(doubleIndices, addDoubleIndex); + stringIndices = Arrays.copyOf(stringIndices, addStringIndex); + decimalIndices = Arrays.copyOf(decimalIndices, addDecimalIndex); + timestampIndices = Arrays.copyOf(timestampIndices, addTimestampIndex); + intervalDayTimeIndices = Arrays.copyOf(intervalDayTimeIndices, addIntervalDayTimeIndex); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 2605203..40a9794 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -22,16 +22,21 @@ import java.lang.management.MemoryMXBean; import java.lang.ref.SoftReference; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.KeyWrapper; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; @@ -51,6 +56,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javolution.util.FastBitSet; + import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -107,6 +114,24 @@ private transient VectorAssignRow vectorAssignRow; + /* + * Grouping sets members. + */ + private transient boolean groupingSetsPresent; + + // The field bits (i.e. which fields to include) or "id" for each grouping set. + private transient int[] groupingSets; + + // The position in the column keys of the dummy grouping set id column. + private transient int groupingSetPosition; + + // The planner puts a constant field in for the dummy grouping set id. We will overwrite it + // as we process the grouping sets. + private transient ConstantVectorExpression groupingSetsDummyVectorExpression; + + // We translate the grouping set bit field into a boolean arrays. + private transient boolean[][] allGroupingSetsOverrideIsNulls; + private transient int numEntriesHashTable; private transient long maxHashTblMemory; @@ -141,6 +166,32 @@ public void endGroup() throws HiveException { // Do nothing. } + protected abstract void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, + boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException; + + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { + + if (!groupingSetsPresent) { + doProcessBatch(batch, false, null); + return; + } + + // We drive the doProcessBatch logic with the same batch but different + // grouping set id and null variation. + // PERFORMANCE NOTE: We do not try to reuse columns and generate the KeyWrappers anew... + + final int size = groupingSets.length; + for (int i = 0; i < size; i++) { + + // NOTE: We are overwriting the constant vector value... + groupingSetsDummyVectorExpression.setLongValue(groupingSets[i]); + groupingSetsDummyVectorExpression.evaluate(batch); + + doProcessBatch(batch, (i == 0), allGroupingSetsOverrideIsNulls[i]); + } + } + /** * Evaluates the aggregators on the current batch. * The aggregationBatchInfo must have been prepared @@ -204,7 +255,8 @@ public void initialize(Configuration hconf) throws HiveException { } @Override - public void processBatch(VectorizedRowBatch batch) throws HiveException { + public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, + boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInput(aggregationBuffers.getAggregationBuffer(i), batch); } @@ -325,11 +377,24 @@ public void initialize(Configuration hconf) throws HiveException { } @Override - public void processBatch(VectorizedRowBatch batch) throws HiveException { + public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, + boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { + + if (!groupingSetsPresent || isFirstGroupingSet) { + + // Evaluate the key expressions once. + for(int i = 0; i < keyExpressions.length; ++i) { + keyExpressions[i].evaluate(batch); + } + } // First we traverse the batch to evaluate and prepare the KeyWrappers // After this the KeyWrappers are properly set and hash code is computed - keyWrappersBatch.evaluateBatch(batch); + if (!groupingSetsPresent) { + keyWrappersBatch.evaluateBatch(batch); + } else { + keyWrappersBatch.evaluateBatchGroupingSets(batch, currentGroupingSetsOverrideIsNulls); + } // Next we locate the aggregation buffer set for each key prepareBatchAggregationBufferSets(batch); @@ -604,10 +669,16 @@ public void free(VectorAggregationBufferRow t) { } @Override - public void processBatch(VectorizedRowBatch batch) throws HiveException { + public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, + boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { + // First we traverse the batch to evaluate and prepare the KeyWrappers // After this the KeyWrappers are properly set and hash code is computed - keyWrappersBatch.evaluateBatch(batch); + if (!groupingSetsPresent) { + keyWrappersBatch.evaluateBatch(batch); + } else { + keyWrappersBatch.evaluateBatchGroupingSets(batch, currentGroupingSetsOverrideIsNulls); + } VectorHashKeyWrapper[] batchKeys = keyWrappersBatch.getVectorHashKeyWrappers(); @@ -699,7 +770,10 @@ public void close(boolean aborted) throws HiveException { @Override public void initialize(Configuration hconf) throws HiveException { inGroup = false; - groupKeyHelper = new VectorGroupKeyHelper(keyExpressions.length); + + // We do not include the dummy grouping set column in the output. So we pass outputKeyLength + // instead of keyExpressions.length + groupKeyHelper = new VectorGroupKeyHelper(outputKeyLength); groupKeyHelper.init(keyExpressions); groupAggregators = allocateAggregationBuffer(); buffer = new DataOutputBuffer(); @@ -722,7 +796,8 @@ public void endGroup() throws HiveException { } @Override - public void processBatch(VectorizedRowBatch batch) throws HiveException { + public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, + boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { assert(inGroup); if (first) { // Copy the group key to output batch now. We'll copy in the aggregates at the end of the group. @@ -781,6 +856,49 @@ public VectorGroupByOperator(CompilationOpContext ctx) { super(ctx); } + private void setupGroupingSets() { + + groupingSetsPresent = conf.isGroupingSetsPresent(); + if (!groupingSetsPresent) { + groupingSets = null; + groupingSetPosition = -1; + groupingSetsDummyVectorExpression = null; + allGroupingSetsOverrideIsNulls = null; + return; + } + + groupingSets = ArrayUtils.toPrimitive(conf.getListGroupingSets().toArray(new Integer[0])); + groupingSetPosition = conf.getGroupingSetPosition(); + + allGroupingSetsOverrideIsNulls = new boolean[groupingSets.length][]; + + int pos = 0; + for (int groupingSet: groupingSets) { + + // Create the mapping corresponding to the grouping set + + // Assume all columns are null, except the dummy column is always non-null. + boolean[] groupingSetsOverrideIsNull = new boolean[keyExpressions.length]; + Arrays.fill(groupingSetsOverrideIsNull, true); + groupingSetsOverrideIsNull[groupingSetPosition] = false; + + // Add keys of this grouping set. + FastBitSet bitset = GroupByOperator.groupingSet2BitSet(groupingSet); + for (int keyPos = bitset.nextSetBit(0); keyPos >= 0; + keyPos = bitset.nextSetBit(keyPos+1)) { + groupingSetsOverrideIsNull[keyPos] = false; + } + + allGroupingSetsOverrideIsNulls[pos] = groupingSetsOverrideIsNull; + pos++; + } + + // The last key column is the dummy grouping set id. + // + // Figure out which (scratch) column was used so we can overwrite the dummy id. + + groupingSetsDummyVectorExpression = (ConstantVectorExpression) keyExpressions[groupingSetPosition]; + } @Override protected void initializeOp(Configuration hconf) throws HiveException { @@ -834,15 +952,19 @@ protected void initializeOp(Configuration hconf) throws HiveException { forwardCache = new Object[outputKeyLength + aggregators.length]; + setupGroupingSets(); + switch (conf.getVectorDesc().getProcessingMode()) { case GLOBAL: Preconditions.checkState(outputKeyLength == 0); + Preconditions.checkState(!groupingSetsPresent); processingMode = this.new ProcessingModeGlobalAggregate(); break; case HASH: processingMode = this.new ProcessingModeHashAggregate(); break; case MERGE_PARTIAL: + Preconditions.checkState(!groupingSetsPresent); processingMode = this.new ProcessingModeReduceMergePartial(); break; case STREAMING: diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java index 50d0452..0ff389e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java @@ -19,8 +19,12 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.io.IOException; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.DataOutputBuffer; /** @@ -33,9 +37,16 @@ public VectorGroupKeyHelper(int keyCount) { } void init(VectorExpression[] keyExpressions) throws HiveException { + + // NOTE: To support pruning the grouping set id dummy key by VectorGroupbyOpeator MERGE_PARTIAL + // case, we use the keyCount passed to the constructor and not keyExpressions.length. + // Inspect the output type of each key expression. - for(int i=0; i < keyExpressions.length; ++i) { - addKey(keyExpressions[i].getOutputType()); + for(int i=0; i < keyCount; ++i) { + String typeName = VectorizationContext.mapTypeNameSynonyms(keyExpressions[i].getOutputType()); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + addKey(columnVectorType); } finishAdding(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java index 2bd1850..64be5e7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java @@ -57,15 +57,22 @@ private HiveDecimalWritable[] decimalValues; private Timestamp[] timestampValues; + private static Timestamp ZERO_TIMESTAMP = new Timestamp(0); private HiveIntervalDayTime[] intervalDayTimeValues; + private static HiveIntervalDayTime ZERO_INTERVALDAYTIME= new HiveIntervalDayTime(0, 0); + // NOTE: The null array is indexed by keyIndex, which is not available internally. The mapping + // from a long, double, etc index to key index is kept once in the separate + // VectorColumnSetInfo object. private boolean[] isNull; + private int hashcode; public VectorHashKeyWrapper(int longValuesCount, int doubleValuesCount, int byteValuesCount, int decimalValuesCount, int timestampValuesCount, - int intervalDayTimeValuesCount) { + int intervalDayTimeValuesCount, + int keyCount) { longValues = longValuesCount > 0 ? new long[longValuesCount] : EMPTY_LONG_ARRAY; doubleValues = doubleValuesCount > 0 ? new double[doubleValuesCount] : EMPTY_DOUBLE_ARRAY; decimalValues = decimalValuesCount > 0 ? new HiveDecimalWritable[decimalValuesCount] : EMPTY_DECIMAL_ARRAY; @@ -89,8 +96,7 @@ public VectorHashKeyWrapper(int longValuesCount, int doubleValuesCount, for(int i = 0; i < intervalDayTimeValuesCount; ++i) { intervalDayTimeValues[i] = new HiveIntervalDayTime(); } - isNull = new boolean[longValuesCount + doubleValuesCount + byteValuesCount + - decimalValuesCount + timestampValuesCount + intervalDayTimeValuesCount]; + isNull = new boolean[keyCount]; hashcode = 0; } @@ -127,19 +133,14 @@ public void setHashKey() { * Hashing the string is potentially expensive so is better to branch. * Additionally not looking at values for nulls allows us not reset the values. */ - if (!isNull[longValues.length + doubleValues.length + i]) { + if (byteLengths[i] != -1) { byte[] bytes = byteValues[i]; int start = byteStarts[i]; int length = byteLengths[i]; - if (length == bytes.length && start == 0) { - hashcode ^= Arrays.hashCode(bytes); - } - else { - // Unfortunately there is no Arrays.hashCode(byte[], start, length) - for(int j = start; j < start + length; ++j) { - // use 461 as is a (sexy!) prime. - hashcode ^= 461 * bytes[j]; - } + // Unfortunately there is no Arrays.hashCode(byte[], start, length) + for(int j = start; j < start + length; ++j) { + // use 461 as is a (sexy!) prime. + hashcode ^= 461 * bytes[j]; } } } @@ -171,7 +172,7 @@ private boolean bytesEquals(VectorHashKeyWrapper keyThat) { //By the time we enter here the byteValues.lentgh and isNull must have already been compared for (int i = 0; i < byteValues.length; ++i) { // the byte comparison is potentially expensive so is better to branch on null - if (!isNull[longValues.length + doubleValues.length + i]) { + if (byteLengths[i] != -1) { if (!StringExpr.equal( byteValues[i], byteStarts[i], @@ -215,7 +216,7 @@ public void duplicateTo(VectorHashKeyWrapper clone) { for (int i = 0; i < byteValues.length; ++i) { // avoid allocation/copy of nulls, because it potentially expensive. // branch instead. - if (!isNull[longValues.length + doubleValues.length + i]) { + if (byteLengths[i] != -1) { clone.byteValues[i] = Arrays.copyOfRange(byteValues[i], byteStarts[i], byteStarts[i] + byteLengths[i]); } @@ -261,106 +262,138 @@ public void copyKey(KeyWrapper oldWrapper) { throw new UnsupportedOperationException(); } - public void assignDouble(int index, double d) { - doubleValues[index] = d; - isNull[longValues.length + index] = false; + public void assignLong(int index, long v) { + longValues[index] = v; } - public void assignNullDouble(int index) { - doubleValues[index] = 0; // assign 0 to simplify hashcode - isNull[longValues.length + index] = true; + public void assignNullLong(int keyIndex, int index) { + isNull[keyIndex] = true; + longValues[index] = 0; // assign 0 to simplify hashcode } - public void assignLong(int index, long v) { - longValues[index] = v; - isNull[index] = false; + public void assignDouble(int index, double d) { + doubleValues[index] = d; } - public void assignNullLong(int index) { - longValues[index] = 0; // assign 0 to simplify hashcode - isNull[index] = true; + public void assignNullDouble(int keyIndex, int index) { + isNull[keyIndex] = true; + doubleValues[index] = 0; // assign 0 to simplify hashcode } public void assignString(int index, byte[] bytes, int start, int length) { byteValues[index] = bytes; byteStarts[index] = start; byteLengths[index] = length; - isNull[longValues.length + doubleValues.length + index] = false; } - public void assignNullString(int index) { - // We do not assign the value to byteValues[] because the value is never used on null - isNull[longValues.length + doubleValues.length + index] = true; + public void assignNullString(int keyIndex, int index) { + isNull[keyIndex] = true; + // We need some value that indicates NULL. + byteLengths[index] = -1; } public void assignDecimal(int index, HiveDecimalWritable value) { decimalValues[index].set(value); - isNull[longValues.length + doubleValues.length + byteValues.length + index] = false; } - public void assignNullDecimal(int index) { - isNull[longValues.length + doubleValues.length + byteValues.length + index] = true; + public void assignNullDecimal(int keyIndex, int index) { + isNull[keyIndex] = true; + decimalValues[index].set(HiveDecimal.ZERO); // assign 0 to simplify hashcode } public void assignTimestamp(int index, Timestamp value) { timestampValues[index] = value; - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + index] = false; } public void assignTimestamp(int index, TimestampColumnVector colVector, int elementNum) { colVector.timestampUpdate(timestampValues[index], elementNum); - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + index] = false; } - public void assignNullTimestamp(int index) { - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + index] = true; + public void assignNullTimestamp(int keyIndex, int index) { + isNull[keyIndex] = true; + timestampValues[index] = ZERO_TIMESTAMP; // assign 0 to simplify hashcode } public void assignIntervalDayTime(int index, HiveIntervalDayTime value) { intervalDayTimeValues[index].set(value); - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + timestampValues.length + index] = false; } public void assignIntervalDayTime(int index, IntervalDayTimeColumnVector colVector, int elementNum) { intervalDayTimeValues[index].set(colVector.asScratchIntervalDayTime(elementNum)); - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + timestampValues.length + index] = false; } - public void assignNullIntervalDayTime(int index) { - isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + timestampValues.length + index] = true; + public void assignNullIntervalDayTime(int keyIndex, int index) { + isNull[keyIndex] = true; + intervalDayTimeValues[index] = ZERO_INTERVALDAYTIME; // assign 0 to simplify hashcode } @Override public String toString() { - return String.format("%d[%s] %d[%s] %d[%s] %d[%s] %d[%s] %d[%s]", - longValues.length, Arrays.toString(longValues), - doubleValues.length, Arrays.toString(doubleValues), - byteValues.length, Arrays.toString(byteValues), - decimalValues.length, Arrays.toString(decimalValues), - timestampValues.length, Arrays.toString(timestampValues), - intervalDayTimeValues.length, Arrays.toString(intervalDayTimeValues)); - } - - public boolean getIsLongNull(int i) { - return isNull[i]; - } + StringBuilder sb = new StringBuilder(); + boolean isFirst = true; + if (longValues.length > 0) { + isFirst = false; + sb.append("longs "); + sb.append(Arrays.toString(longValues)); + } + if (doubleValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("doubles "); + sb.append(Arrays.toString(doubleValues)); + } + if (byteValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("byte lengths "); + sb.append(Arrays.toString(byteLengths)); + } + if (decimalValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("decimals "); + sb.append(Arrays.toString(decimalValues)); + } + if (timestampValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("timestamps "); + sb.append(Arrays.toString(timestampValues)); + } + if (intervalDayTimeValues.length > 0) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("interval day times "); + sb.append(Arrays.toString(intervalDayTimeValues)); + } - public boolean getIsDoubleNull(int i) { - return isNull[longValues.length + i]; - } + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append("nulls "); + sb.append(Arrays.toString(isNull)); - public boolean getIsBytesNull(int i) { - return isNull[longValues.length + doubleValues.length + i]; + return sb.toString(); } - public long getLongValue(int i) { return longValues[i]; } @@ -390,30 +423,23 @@ public int getVariableSize() { return variableSize; } - public boolean getIsDecimalNull(int i) { - return isNull[longValues.length + doubleValues.length + byteValues.length + i]; - } - public HiveDecimalWritable getDecimal(int i) { return decimalValues[i]; } - public boolean getIsTimestampNull(int i) { - return isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + i]; - } - public Timestamp getTimestamp(int i) { return timestampValues[i]; } - public boolean getIsIntervalDayTimeNull(int i) { - return isNull[longValues.length + doubleValues.length + byteValues.length + - decimalValues.length + timestampValues.length + i]; - } - public HiveIntervalDayTime getIntervalDayTime(int i) { return intervalDayTimeValues[i]; } -} + public void clearIsNull() { + Arrays.fill(isNull, false); + } + + public boolean isNull(int keyIndex) { + return isNull[keyIndex]; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java index b4708b5..c23d437 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java @@ -23,6 +23,9 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; /** * Class for handling vectorized hash map key wrappers. It evaluates the key columns in a @@ -85,12 +88,168 @@ public int getKeysFixedSize() { * @throws HiveException */ public void evaluateBatch(VectorizedRowBatch batch) throws HiveException { - for(int i = 0; i < keyExpressions.length; ++i) { - keyExpressions[i].evaluate(batch); + + for(int i=0;i= 0) { - return kw.getIsLongNull(klh.longIndex) ? null : - keyOutputWriter.writeValue(kw.getLongValue(klh.longIndex)); - } else if (klh.doubleIndex >= 0) { - return kw.getIsDoubleNull(klh.doubleIndex) ? null : - keyOutputWriter.writeValue(kw.getDoubleValue(klh.doubleIndex)); - } else if (klh.stringIndex >= 0) { - return kw.getIsBytesNull(klh.stringIndex) ? null : - keyOutputWriter.writeValue( - kw.getBytes(klh.stringIndex), - kw.getByteStart(klh.stringIndex), - kw.getByteLength(klh.stringIndex)); - } else if (klh.decimalIndex >= 0) { - return kw.getIsDecimalNull(klh.decimalIndex)? null : - keyOutputWriter.writeValue( - kw.getDecimal(klh.decimalIndex)); - } else if (klh.timestampIndex >= 0) { - return kw.getIsTimestampNull(klh.timestampIndex)? null : - keyOutputWriter.writeValue( - kw.getTimestamp(klh.timestampIndex)); - } else if (klh.intervalDayTimeIndex >= 0) { - return kw.getIsIntervalDayTimeNull(klh.intervalDayTimeIndex)? null : - keyOutputWriter.writeValue( - kw.getIntervalDayTime(klh.intervalDayTimeIndex)); - } else { - throw new HiveException(String.format( - "Internal inconsistent KeyLookupHelper at index [%d]:%d %d %d %d %d %d", - i, klh.longIndex, klh.doubleIndex, klh.stringIndex, klh.decimalIndex, - klh.timestampIndex, klh.intervalDayTimeIndex)); + if (kw.isNull(keyIndex)) { + return null; + } + + ColumnVector.Type columnVectorType = columnVectorTypes[keyIndex]; + int columnTypeSpecificIndex = columnTypeSpecificIndices[keyIndex]; + + switch (columnVectorType) { + case LONG: + return keyOutputWriter.writeValue( + kw.getLongValue(columnTypeSpecificIndex)); + case DOUBLE: + return keyOutputWriter.writeValue( + kw.getDoubleValue(columnTypeSpecificIndex)); + case BYTES: + return keyOutputWriter.writeValue( + kw.getBytes(columnTypeSpecificIndex), + kw.getByteStart(columnTypeSpecificIndex), + kw.getByteLength(columnTypeSpecificIndex)); + case DECIMAL: + return keyOutputWriter.writeValue( + kw.getDecimal(columnTypeSpecificIndex)); + case TIMESTAMP: + return keyOutputWriter.writeValue( + kw.getTimestamp(columnTypeSpecificIndex)); + case INTERVAL_DAY_TIME: + return keyOutputWriter.writeValue( + kw.getIntervalDayTime(columnTypeSpecificIndex)); + default: + throw new HiveException("Unexpected column vector type " + columnVectorType); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index e3d9d7f..cba0424 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1622,14 +1622,6 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, bo GroupByDesc desc = op.getConf(); VectorGroupByDesc vectorDesc = desc.getVectorDesc(); - if (desc.isGroupingSetsPresent()) { - LOG.info("Grouping sets not supported in vector mode"); - return false; - } - if (desc.pruneGroupingSetId()) { - LOG.info("Pruning grouping set id not supported in vector mode"); - return false; - } if (desc.getMode() != GroupByDesc.Mode.HASH && desc.isDistinct()) { LOG.info("DISTINCT not supported in vector mode"); return false; @@ -1940,6 +1932,10 @@ private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorA LOG.info("Cannot vectorize groupby aggregate expression: UDF " + udfName + " not supported"); return new Pair(false, false); } + if (aggDesc.getDistinct()) { + LOG.info("Cannot vectorize groupby aggregate expression: DISTINCT"); + return new Pair(false, false); + } if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters())) { LOG.info("Cannot vectorize groupby aggregate expression: UDF parameters not supported"); return new Pair(false, false); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java index 99791e5..113de86 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java @@ -265,6 +265,11 @@ public boolean isDistinctLike() { return true; } + @Explain(displayName = "grouping sets") + public List getDisplayGroupingSets() { + return (groupingSetsPresent ? listGroupingSets : null); + } + // Consider a query like: // select a, b, count(distinct c) from T group by a,b with rollup; // Assume that hive.map.aggr is set to true and hive.groupby.skewindata is false, diff --git ql/src/test/queries/clientpositive/groupby_grouping_id1.q ql/src/test/queries/clientpositive/groupby_grouping_id1.q index d43ea37..9948ce9 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_id1.q +++ ql/src/test/queries/clientpositive/groupby_grouping_id1.q @@ -2,6 +2,8 @@ CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +-- SORT_QUERY_RESULTS + SELECT key, val, GROUPING__ID from T1 group by key, val with cube; SELECT key, val, GROUPING__ID from T1 group by cube(key, val); diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets1.q ql/src/test/queries/clientpositive/groupby_grouping_sets1.q index e239a87..4fcfd3b 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets1.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets1.q @@ -1,3 +1,6 @@ + +-- SORT_QUERY_RESULTS + CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets2.q ql/src/test/queries/clientpositive/groupby_grouping_sets2.q index b470964..af5bbe6 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets2.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets2.q @@ -1,6 +1,8 @@ set hive.mapred.mode=nonstrict; set hive.new.job.grouping.set.cardinality=2; +-- SORT_QUERY_RESULTS + CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets3.q ql/src/test/queries/clientpositive/groupby_grouping_sets3.q index 3c1a5e7..ef4a7aa 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets3.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets3.q @@ -1,3 +1,6 @@ + +-- SORT_QUERY_RESULTS + -- In this test, 2 files are loaded into table T1. The data contains rows with the same value of a and b, -- with different number of rows for a and b in each file. Since bucketizedHiveInputFormat is used, -- this tests that the aggregate function stores the partial aggregate state correctly even if an diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets5.q ql/src/test/queries/clientpositive/groupby_grouping_sets5.q index c1c98b3..570d464 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets5.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets5.q @@ -7,6 +7,8 @@ CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMIN LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; +-- SORT_QUERY_RESULTS + -- This tests that cubes and rollups work fine where the source is a sub-query EXPLAIN SELECT a, b, count(*) FROM diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets6.q ql/src/test/queries/clientpositive/groupby_grouping_sets6.q index 5cdb4a5..e537bce 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets6.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets6.q @@ -3,6 +3,8 @@ CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMIN LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; +-- SORT_QUERY_RESULTS + set hive.optimize.ppd = false; -- This filter is not pushed down diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q index 1b753e1..12d2a56 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q @@ -1,3 +1,6 @@ + +-- SORT_QUERY_RESULTS + CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets_limit.q ql/src/test/queries/clientpositive/groupby_grouping_sets_limit.q index db88d5f..b6c5143 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets_limit.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets_limit.q @@ -1,3 +1,6 @@ + +-- SORT_QUERY_RESULTS + CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/vector_groupby_cube1.q ql/src/test/queries/clientpositive/vector_groupby_cube1.q new file mode 100644 index 0000000..fd2f0de --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_cube1.q @@ -0,0 +1,55 @@ +set hive.mapred.mode=nonstrict; +set hive.map.aggr=true; +set hive.groupby.skewindata=false; + +-- SORT_QUERY_RESULTS + +CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; + +EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube; +EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY CUBE(key, val); + +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube; + +EXPLAIN +SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube; + +SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube; + +EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube; + +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube; + +set hive.groupby.skewindata=true; + +EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube; + +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube; + +EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube; + +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube; + + +set hive.multigroupby.singlereducer=true; + +CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE; +CREATE TABLE T3(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE; + +EXPLAIN +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube; + + +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube; + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_id1.q ql/src/test/queries/clientpositive/vector_groupby_grouping_id1.q new file mode 100644 index 0000000..2c9bd3d --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_id1.q @@ -0,0 +1,22 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(key STRING, val STRING) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +SELECT key, val, GROUPING__ID from T1 group by key, val with cube; +SELECT key, val, GROUPING__ID from T1 group by cube(key, val); + +SELECT GROUPING__ID, key, val from T1 group by key, val with rollup; +SELECT GROUPING__ID, key, val from T1 group by rollup (key, val); + +SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube; +SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by cube(key, val); + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_id2.q ql/src/test/queries/clientpositive/vector_groupby_grouping_id2.q new file mode 100644 index 0000000..ebebc2e --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_id2.q @@ -0,0 +1,64 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +set hive.groupby.skewindata = true; + +-- SORT_QUERY_RESULTS + +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP; +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP (key, value); + +SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID; + +SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP(key, value) +) t +GROUP BY GROUPING__ID; + + +SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID; + +SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key,value)) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key, value)) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID; + + + + + +set hive.groupby.skewindata = false; + +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP; + +SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID; + +SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID; + + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_id3.q ql/src/test/queries/clientpositive/vector_groupby_grouping_id3.q new file mode 100644 index 0000000..29e9211 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_id3.q @@ -0,0 +1,42 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +set hive.cbo.enable = false; + +-- SORT_QUERY_RESULTS + +EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; + +set hive.cbo.enable = true; + +EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets1.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets1.q new file mode 100644 index 0000000..f8561fa --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets1.q @@ -0,0 +1,29 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +-- SORT_QUERY_RESULTS + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +SELECT * FROM T1; + +SELECT a, b, count(*) from T1 group by a, b with cube; +SELECT a, b, count(*) from T1 group by cube(a, b); + +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()); + +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)); + +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c); + +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)); + +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b); + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets2.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets2.q new file mode 100644 index 0000000..158612c --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets2.q @@ -0,0 +1,36 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.new.job.grouping.set.cardinality=2; + +-- SORT_QUERY_RESULTS + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- Since 4 grouping sets would be generated for the query below, an additional MR job should be created +EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube; + +EXPLAIN +SELECT a, b, count(*) from T1 group by cube(a, b); +SELECT a, b, count(*) from T1 group by a, b with cube; + +EXPLAIN +SELECT a, b, sum(c) from T1 group by a, b with cube; +SELECT a, b, sum(c) from T1 group by a, b with cube; + +CREATE TABLE T2(a STRING, b STRING, c int, d int) STORED AS ORC; + +INSERT OVERWRITE TABLE T2 +SELECT a, b, c, c from T1; + +EXPLAIN +SELECT a, b, sum(c+d) from T2 group by a, b with cube; +SELECT a, b, sum(c+d) from T2 group by a, b with cube; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets3.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets3.q new file mode 100644 index 0000000..d299279 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets3.q @@ -0,0 +1,42 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.new.job.grouping.set.cardinality=2; + +-- SORT_QUERY_RESULTS + +-- In this test, 2 files are loaded into table T1. The data contains rows with the same value of a and b, +-- with different number of rows for a and b in each file. Since bucketizedHiveInputFormat is used, +-- this tests that the aggregate function stores the partial aggregate state correctly even if an +-- additional MR job is created for processing the grouping sets. +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' INTO TABLE T1_text; +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +set hive.new.job.grouping.set.cardinality = 30; + +-- The query below will execute in a single MR job, since 4 rows are generated per input row +-- (cube of a,b will lead to (a,b), (a, null), (null, b) and (null, null) and +-- hive.new.job.grouping.set.cardinality is more than 4. +EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube; + +EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by cube(a, b); +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube; + +set hive.new.job.grouping.set.cardinality=2; + +-- The query below will execute in 2 MR jobs, since hive.new.job.grouping.set.cardinality is set to 2. +-- The partial aggregation state should be maintained correctly across MR jobs. +EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube; +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube; + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets4.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets4.q new file mode 100644 index 0000000..ef0d832 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets4.q @@ -0,0 +1,57 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.merge.mapfiles = false; +set hive.merge.mapredfiles = false; + +-- SORT_QUERY_RESULTS + +-- Set merging to false above to make the explain more readable + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- This tests that cubes and rollups work fine inside sub-queries. +EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a; + +EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq2 +on subq1.a = subq2.a; + +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a; + +set hive.new.job.grouping.set.cardinality=2; + +-- Since 4 grouping sets would be generated for each sub-query, an additional MR job should be created +-- for each of them +EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a; + +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a; + diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets5.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets5.q new file mode 100644 index 0000000..15be3f3 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets5.q @@ -0,0 +1,39 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.merge.mapfiles = false; +set hive.merge.mapredfiles = false; +-- Set merging to false above to make the explain more readable + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +-- This tests that cubes and rollups work fine where the source is a sub-query +EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube; + +EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by cube(a, b); + +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube; + +set hive.new.job.grouping.set.cardinality=2; + +-- Since 4 grouping sets would be generated for the cube, an additional MR job should be created +EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube; + +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets6.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets6.q new file mode 100644 index 0000000..72c2078 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets6.q @@ -0,0 +1,38 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +set hive.optimize.ppd = false; + +-- This filter is not pushed down +EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5; + +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5; + +set hive.cbo.enable = true; + +-- This filter is pushed down through aggregate with grouping sets by Calcite +EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5; + +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q new file mode 100644 index 0000000..7b7c892 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q @@ -0,0 +1,99 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; + +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; + +set hive.cbo.enable=false; + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value); + +explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value); + +explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1; + +explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; + +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_limit.q ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_limit.q new file mode 100644 index 0000000..00649f7 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_limit.q @@ -0,0 +1,42 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text; + +-- SORT_QUERY_RESULTS + +EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10; + +SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10; + +EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10; + +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10; + +EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10; + +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10; + +EXPLAIN +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10; + +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10; + +EXPLAIN +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10; + +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10; + +EXPLAIN +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10; + +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10; diff --git ql/src/test/queries/clientpositive/vector_groupby_grouping_window.q ql/src/test/queries/clientpositive/vector_groupby_grouping_window.q new file mode 100644 index 0000000..7d75433 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_grouping_window.q @@ -0,0 +1,20 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +create table t(category int, live int, comments int) stored as orc; +insert into table t select key, 0, 2 from src tablesample(3 rows); + +explain +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0; + +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0; diff --git ql/src/test/results/clientpositive/annotate_stats_groupby.q.out ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index a8e4854..d2a7a1e 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -300,6 +300,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -350,6 +351,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -400,6 +402,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -450,6 +453,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 1 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -500,6 +504,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 1, 2 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -550,6 +555,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 1, 2, 0 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -600,6 +606,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -699,6 +706,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -798,6 +806,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -848,6 +857,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -898,6 +908,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -948,6 +959,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 1 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -998,6 +1010,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 1, 2 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1048,6 +1061,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 1, 2, 0 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1098,6 +1112,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1197,6 +1212,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out index 31c4ed1..74c4d53 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out @@ -103,6 +103,7 @@ STAGE PLANS: outputColumnNames: state, country Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), country (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -251,6 +252,7 @@ STAGE PLANS: outputColumnNames: state, country Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), country (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -350,6 +352,7 @@ STAGE PLANS: outputColumnNames: state, country Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), country (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out index f260f03..b87a9be 100644 --- ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out +++ ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out @@ -300,6 +300,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -353,6 +354,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -406,6 +408,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -459,6 +462,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 1 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -512,6 +516,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 1, 2 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -565,6 +570,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 2 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -618,6 +624,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -720,6 +727,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -822,6 +830,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -875,6 +884,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -928,6 +938,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -981,6 +992,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 1 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1034,6 +1046,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 1, 2 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1087,6 +1100,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1140,6 +1154,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1242,6 +1257,7 @@ STAGE PLANS: outputColumnNames: state, locid Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: state (type: string), locid (type: int), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/groupby_cube1.q.out ql/src/test/results/clientpositive/groupby_cube1.q.out index 0486b68..6afc9e3 100644 --- ql/src/test/results/clientpositive/groupby_cube1.q.out +++ ql/src/test/results/clientpositive/groupby_cube1.q.out @@ -37,6 +37,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 2, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -96,6 +97,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 2, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -181,6 +183,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 2, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -265,6 +268,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) + grouping sets: 0, 1 keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -338,6 +342,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 2, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -448,6 +453,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) + grouping sets: 0, 1 keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -570,6 +576,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 2, 3 keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -586,6 +593,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(1) + grouping sets: 0, 1, 2, 3 keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out index f6e1b17..f720f10 100644 --- ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out +++ ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out @@ -50,6 +50,7 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: key (type: string), value (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -64,6 +65,7 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 1, 3 keys: key (type: string), value (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/groupby_grouping_id1.q.out ql/src/test/results/clientpositive/groupby_grouping_id1.q.out index 9ef7615..e390535 100644 --- ql/src/test/results/clientpositive/groupby_grouping_id1.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_id1.q.out @@ -22,24 +22,24 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by key, val with cu POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 -NULL 11 2 -NULL 12 2 -NULL 13 2 -NULL 17 2 -NULL 18 2 -NULL 28 2 -1 NULL 1 1 11 3 -2 NULL 1 +1 NULL 1 2 12 3 -3 NULL 1 +2 NULL 1 3 13 3 -7 NULL 1 +3 NULL 1 7 17 3 -8 NULL 1 +7 NULL 1 8 18 3 8 28 3 +8 NULL 1 +NULL 11 2 +NULL 12 2 +NULL 13 2 +NULL 17 2 +NULL 18 2 +NULL 28 2 +NULL NULL 0 PREHOOK: query: SELECT key, val, GROUPING__ID from T1 group by cube(key, val) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -48,24 +48,24 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by cube(key, val) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 -NULL 11 2 -NULL 12 2 -NULL 13 2 -NULL 17 2 -NULL 18 2 -NULL 28 2 -1 NULL 1 1 11 3 -2 NULL 1 +1 NULL 1 2 12 3 -3 NULL 1 +2 NULL 1 3 13 3 -7 NULL 1 +3 NULL 1 7 17 3 -8 NULL 1 +7 NULL 1 8 18 3 8 28 3 +8 NULL 1 +NULL 11 2 +NULL 12 2 +NULL 13 2 +NULL 17 2 +NULL 18 2 +NULL 28 2 +NULL NULL 0 PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with rollup PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -76,14 +76,14 @@ POSTHOOK: Input: default@t1 #### A masked pattern was here #### 0 NULL NULL 1 1 NULL -3 1 11 1 2 NULL -3 2 12 1 3 NULL -3 3 13 1 7 NULL -3 7 17 1 8 NULL +3 1 11 +3 2 12 +3 3 13 +3 7 17 3 8 18 3 8 28 PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by rollup (key, val) @@ -96,14 +96,14 @@ POSTHOOK: Input: default@t1 #### A masked pattern was here #### 0 NULL NULL 1 1 NULL -3 1 11 1 2 NULL -3 2 12 1 3 NULL -3 3 13 1 7 NULL -3 7 17 1 8 NULL +3 1 11 +3 2 12 +3 3 13 +3 7 17 3 8 18 3 8 28 PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube @@ -114,24 +114,24 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 -NULL 11 2 2 -NULL 12 2 2 -NULL 13 2 2 -NULL 17 2 2 -NULL 18 2 2 -NULL 28 2 2 -1 NULL 1 1 1 11 3 3 -2 NULL 1 1 +1 NULL 1 1 2 12 3 3 -3 NULL 1 1 +2 NULL 1 1 3 13 3 3 -7 NULL 1 1 +3 NULL 1 1 7 17 3 3 -8 NULL 1 1 +7 NULL 1 1 8 18 3 3 8 28 3 3 +8 NULL 1 1 +NULL 11 2 2 +NULL 12 2 2 +NULL 13 2 2 +NULL 17 2 2 +NULL 18 2 2 +NULL 28 2 2 +NULL NULL 0 0 PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by cube(key, val) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -140,21 +140,21 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 -NULL 11 2 2 -NULL 12 2 2 -NULL 13 2 2 -NULL 17 2 2 -NULL 18 2 2 -NULL 28 2 2 -1 NULL 1 1 1 11 3 3 -2 NULL 1 1 +1 NULL 1 1 2 12 3 3 -3 NULL 1 1 +2 NULL 1 1 3 13 3 3 -7 NULL 1 1 +3 NULL 1 1 7 17 3 3 -8 NULL 1 1 +7 NULL 1 1 8 18 3 3 8 28 3 3 +8 NULL 1 1 +NULL 11 2 2 +NULL 12 2 2 +NULL 13 2 2 +NULL 17 2 2 +NULL 18 2 2 +NULL 28 2 2 +NULL NULL 0 0 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets1.q.out ql/src/test/results/clientpositive/groupby_grouping_sets1.q.out index aebba0d..c685313 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets1.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets1.q.out @@ -22,12 +22,12 @@ POSTHOOK: query: SELECT * FROM T1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -8 1 1 -5 2 2 1 1 3 2 2 4 2 3 5 3 2 8 +5 2 2 +8 1 1 PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -36,21 +36,21 @@ POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: SELECT a, b, count(*) from T1 group by cube(a, b) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -59,21 +59,21 @@ POSTHOOK: query: SELECT a, b, count(*) from T1 group by cube(a, b) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -82,21 +82,21 @@ POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -105,17 +105,17 @@ POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, ( POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 PREHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -124,6 +124,11 @@ POSTHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +1 +2 +3 +5 +8 NULL NULL NULL @@ -133,11 +138,6 @@ NULL NULL NULL NULL -1 -2 -3 -5 -8 PREHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) PREHOOK: type: QUERY PREHOOK: Input: default@t1 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out index b4f8ce7..4cd6d72 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out @@ -51,6 +51,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 @@ -135,6 +136,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 @@ -190,21 +192,21 @@ POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: EXPLAIN SELECT a, b, sum(c) from T1 group by a, b with cube PREHOOK: type: QUERY @@ -242,6 +244,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + grouping sets: 0, 1, 2, 3 keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 @@ -297,21 +300,21 @@ POSTHOOK: query: SELECT a, b, sum(c) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 23.0 -NULL 1 4.0 -NULL 2 14.0 -NULL 3 5.0 -1 NULL 3.0 1 1 3.0 -2 NULL 9.0 +1 NULL 3.0 2 2 4.0 2 3 5.0 -3 NULL 8.0 +2 NULL 9.0 3 2 8.0 -5 NULL 2.0 +3 NULL 8.0 5 2 2.0 -8 NULL 1.0 +5 NULL 2.0 8 1 1.0 +8 NULL 1.0 +NULL 1 4.0 +NULL 2 14.0 +NULL 3 5.0 +NULL NULL 23.0 PREHOOK: query: CREATE TABLE T2(a STRING, b STRING, c int, d int) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -371,6 +374,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + grouping sets: 0, 1, 2, 3 keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 @@ -426,18 +430,18 @@ POSTHOOK: query: SELECT a, b, sum(c+d) from T2 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 #### A masked pattern was here #### -NULL NULL 46 -NULL 1 8 -NULL 2 28 -NULL 3 10 -1 NULL 6 1 1 6 -2 NULL 18 +1 NULL 6 2 2 8 2 3 10 -3 NULL 16 +2 NULL 18 3 2 16 -5 NULL 4 +3 NULL 16 5 2 4 -8 NULL 2 +5 NULL 4 8 1 2 +8 NULL 2 +NULL 1 8 +NULL 2 28 +NULL 3 10 +NULL NULL 46 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out index 67cbdcd..5e42b82 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out @@ -45,6 +45,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(c), count() + grouping sets: 0, 1, 2, 3 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -104,6 +105,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(c), count() + grouping sets: 0, 1, 2, 3 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -148,22 +150,22 @@ POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 3.8333333333333335 12 -NULL 1 2.0 5 -NULL 2 5.2 5 -NULL 3 5.0 2 -1 NULL 2.6666666666666665 3 1 1 3.0 2 1 2 2.0 1 -2 NULL 5.2 5 +1 NULL 2.6666666666666665 3 2 2 5.333333333333333 3 2 3 5.0 2 -3 NULL 8.0 1 +2 NULL 5.2 5 3 2 8.0 1 -5 NULL 2.0 1 +3 NULL 8.0 1 5 1 2.0 1 -8 NULL 1.0 2 +5 NULL 2.0 1 8 1 1.0 2 +8 NULL 1.0 2 +NULL 1 2.0 5 +NULL 2 5.2 5 +NULL 3 5.0 2 +NULL NULL 3.8333333333333335 12 PREHOOK: query: EXPLAIN SELECT a, b, avg(c), count(*) from T1 group by a, b with cube PREHOOK: type: QUERY @@ -201,6 +203,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), count(VALUE._col1) + grouping sets: 0, 1, 2, 3 keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -256,19 +259,19 @@ POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 3.8333333333333335 12 -NULL 1 2.0 5 -NULL 2 5.2 5 -NULL 3 5.0 2 -1 NULL 2.6666666666666665 3 1 1 3.0 2 1 2 2.0 1 -2 NULL 5.2 5 +1 NULL 2.6666666666666665 3 2 2 5.333333333333333 3 2 3 5.0 2 -3 NULL 8.0 1 +2 NULL 5.2 5 3 2 8.0 1 -5 NULL 2.0 1 +3 NULL 8.0 1 5 1 2.0 1 -8 NULL 1.0 2 +5 NULL 2.0 1 8 1 1.0 2 +8 NULL 1.0 2 +NULL 1 2.0 5 +NULL 2 5.2 5 +NULL 3 5.0 2 +NULL NULL 3.8333333333333335 12 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out index 5884b54..a43fd9f 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out @@ -46,6 +46,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 0, 1, 2, 3 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -123,6 +124,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 0, 1, 2, 3 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -193,6 +195,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 0, 1, 2, 3 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -270,6 +273,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 0, 1, 2, 3 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -384,6 +388,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 @@ -485,6 +490,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out index 166f110..b19aa77 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets5.q.out @@ -56,6 +56,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 0, 1, 2, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -145,6 +146,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 0, 1, 2, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -202,21 +204,21 @@ POSTHOOK: query: SELECT a, b, count(*) FROM POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: EXPLAIN SELECT a, b, count(*) FROM (SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube @@ -284,6 +286,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 @@ -341,18 +344,18 @@ POSTHOOK: query: SELECT a, b, count(*) FROM POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 -8 NULL 1 +5 NULL 1 8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out index 16f0871..6797e4a 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets6.q.out @@ -39,6 +39,7 @@ STAGE PLANS: predicate: (UDFToDouble(a) = 5.0) (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 3, 1 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -81,8 +82,8 @@ WHERE res.a=5 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -5 NULL 5 2 +5 NULL PREHOOK: query: EXPLAIN SELECT a, b FROM (SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res @@ -108,6 +109,7 @@ STAGE PLANS: predicate: (UDFToDouble(a) = 5.0) (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 3, 1 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -150,5 +152,5 @@ WHERE res.a=5 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -5 NULL 5 2 +5 NULL diff --git ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out index 62f40cd..f117d4d 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out @@ -40,6 +40,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 3 keys: _col0 (type: int), _col1 (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -85,17 +86,17 @@ group by rollup(key, value) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 0 +1 1 3 1 1 1 NULL 1 0 1 1 NULL 3 1 1 -1 1 3 1 1 -2 NULL 1 0 1 2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 3 NULL 1 0 1 3 NULL 3 1 1 -3 3 3 1 1 -4 NULL 1 0 1 4 5 3 1 1 +4 NULL 1 0 1 +NULL NULL 0 0 0 PREHOOK: query: explain select key, value, `grouping__id`, grouping(key), grouping(value) from T1 @@ -122,6 +123,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: _col0 (type: int), _col1 (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -167,22 +169,22 @@ group by cube(key, value) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 0 -NULL NULL 2 1 0 -NULL 1 2 1 0 -NULL 2 2 1 0 -NULL 3 2 1 0 -NULL 5 2 1 0 +1 1 3 1 1 1 NULL 1 0 1 1 NULL 3 1 1 -1 1 3 1 1 -2 NULL 1 0 1 2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 3 NULL 1 0 1 3 NULL 3 1 1 -3 3 3 1 1 -4 NULL 1 0 1 4 5 3 1 1 +4 NULL 1 0 1 +NULL 1 2 1 0 +NULL 2 2 1 0 +NULL 3 2 1 0 +NULL 5 2 1 0 +NULL NULL 0 0 0 +NULL NULL 2 1 0 PREHOOK: query: explain select key, value from T1 @@ -211,6 +213,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: _col0 (type: int), _col1 (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -261,17 +264,17 @@ having grouping(key) = 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL -NULL 1 -NULL 2 -NULL 3 -NULL 5 -1 NULL 1 1 +1 NULL 2 2 -3 NULL 3 3 +3 NULL 4 5 +NULL 1 +NULL 2 +NULL 3 +NULL 5 +NULL NULL PREHOOK: query: explain select key, value, grouping(key)+grouping(value) as x from T1 @@ -303,6 +306,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: _col0 (type: int), _col1 (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -376,21 +380,21 @@ order by x desc, case when x = 1 then key end POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -4 5 2 -3 3 2 -3 NULL 2 -2 2 2 1 1 2 -1 NULL 2 -NULL 1 1 -NULL NULL 1 -NULL 5 1 -NULL 3 1 -NULL 2 1 1 NULL 1 +1 NULL 2 +2 2 2 2 NULL 1 +3 3 2 3 NULL 1 +3 NULL 2 +4 5 2 4 NULL 1 +NULL 1 1 +NULL 2 1 +NULL 3 1 +NULL 5 1 +NULL NULL 1 PREHOOK: query: explain select key, value, `grouping__id`, grouping(key), grouping(value) from T1 @@ -417,6 +421,7 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 3 keys: key (type: int), value (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -462,17 +467,17 @@ group by rollup(key, value) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 0 +1 1 3 1 1 1 NULL 1 0 1 1 NULL 3 1 1 -1 1 3 1 1 -2 NULL 1 0 1 2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 3 NULL 1 0 1 3 NULL 3 1 1 -3 3 3 1 1 -4 NULL 1 0 1 4 5 3 1 1 +4 NULL 1 0 1 +NULL NULL 0 0 0 PREHOOK: query: explain select key, value, `grouping__id`, grouping(key), grouping(value) from T1 @@ -499,6 +504,7 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: key (type: int), value (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -544,22 +550,22 @@ group by cube(key, value) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 0 -NULL NULL 2 1 0 -NULL 1 2 1 0 -NULL 2 2 1 0 -NULL 3 2 1 0 -NULL 5 2 1 0 +1 1 3 1 1 1 NULL 1 0 1 1 NULL 3 1 1 -1 1 3 1 1 -2 NULL 1 0 1 2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 3 NULL 1 0 1 3 NULL 3 1 1 -3 3 3 1 1 -4 NULL 1 0 1 4 5 3 1 1 +4 NULL 1 0 1 +NULL 1 2 1 0 +NULL 2 2 1 0 +NULL 3 2 1 0 +NULL 5 2 1 0 +NULL NULL 0 0 0 +NULL NULL 2 1 0 PREHOOK: query: explain select key, value from T1 @@ -588,6 +594,7 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: key (type: int), value (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -635,17 +642,17 @@ having grouping(key) = 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL -NULL 1 -NULL 2 -NULL 3 -NULL 5 -1 NULL 1 1 +1 NULL 2 2 -3 NULL 3 3 +3 NULL 4 5 +NULL 1 +NULL 2 +NULL 3 +NULL 5 +NULL NULL PREHOOK: query: explain select key, value, grouping(key)+grouping(value) as x from T1 @@ -677,6 +684,7 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1, 2, 3 keys: key (type: int), value (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -750,18 +758,18 @@ order by x desc, case when x = 1 then key end POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -4 5 2 -3 3 2 -3 NULL 2 -2 2 2 1 1 2 -1 NULL 2 -NULL 1 1 -NULL NULL 1 -NULL 5 1 -NULL 3 1 -NULL 2 1 1 NULL 1 +1 NULL 2 +2 2 2 2 NULL 1 +3 3 2 3 NULL 1 +3 NULL 2 +4 5 2 4 NULL 1 +NULL 1 1 +NULL 2 1 +NULL 3 1 +NULL 5 1 +NULL NULL 1 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/groupby_grouping_sets_limit.q.out index e2d9d96..997bb5a 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets_limit.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets_limit.q.out @@ -37,6 +37,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 0, 1, 2, 3 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -85,16 +86,16 @@ POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 +2 NULL 2 3 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: EXPLAIN SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10 PREHOOK: type: QUERY @@ -118,6 +119,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 0, 1, 2, 3 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -166,16 +168,16 @@ POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 6 -NULL 1 2 -NULL 2 3 -NULL 3 1 -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 +2 NULL 2 3 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 PREHOOK: query: EXPLAIN SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10 PREHOOK: type: QUERY @@ -199,6 +201,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 3, 1 keys: a (type: string), b (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -247,15 +250,15 @@ POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, ( POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 NULL 1 1 1 1 -2 NULL 2 +1 NULL 1 2 2 1 2 3 1 -3 NULL 1 +2 NULL 2 3 2 1 -5 NULL 1 +3 NULL 1 5 2 1 +5 NULL 1 8 NULL 1 PREHOOK: query: EXPLAIN SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10 @@ -279,6 +282,7 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 1, 2, 4 keys: a (type: string), b (type: string), c (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -325,6 +329,7 @@ POSTHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### +1 NULL NULL NULL @@ -334,7 +339,6 @@ NULL NULL NULL NULL -1 PREHOOK: query: EXPLAIN SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10 PREHOOK: type: QUERY @@ -357,6 +361,7 @@ STAGE PLANS: outputColumnNames: a Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 1 keys: a (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -427,6 +432,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + grouping sets: 1 keys: _col0 (type: double), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/groupby_grouping_window.q.out ql/src/test/results/clientpositive/groupby_grouping_window.q.out index 251f4f7..ae07e0c 100644 --- ql/src/test/results/clientpositive/groupby_grouping_window.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_window.q.out @@ -49,6 +49,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(live), max(comments) + grouping sets: 0, 1 keys: category (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/groupby_rollup1.q.out ql/src/test/results/clientpositive/groupby_rollup1.q.out index 5fd011e..fd96e18 100644 --- ql/src/test/results/clientpositive/groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/groupby_rollup1.q.out @@ -37,6 +37,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -116,6 +117,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) + grouping sets: 0, 1 keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -189,6 +191,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -293,6 +296,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT val) + grouping sets: 0, 1 keys: key (type: string), 0 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -415,6 +419,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 3 keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -431,6 +436,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(1) + grouping sets: 0, 1, 3 keys: key (type: string), val (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out index d740dea..1a890fe 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out @@ -38,6 +38,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -1470,6 +1471,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 0, 1, 2, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -1648,6 +1650,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) + grouping sets: 1, 2 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/limit_pushdown2.q.out ql/src/test/results/clientpositive/limit_pushdown2.q.out index cdd221b..689701b 100644 --- ql/src/test/results/clientpositive/limit_pushdown2.q.out +++ ql/src/test/results/clientpositive/limit_pushdown2.q.out @@ -937,6 +937,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(_col2) + grouping sets: 0, 1, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -1026,6 +1027,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(_col2) + grouping sets: 0, 1, 3 keys: _col0 (type: string), _col1 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/llap/vector_count.q.out ql/src/test/results/clientpositive/llap/vector_count.q.out index 9ef5c2b..7c71355 100644 --- ql/src/test/results/clientpositive/llap/vector_count.q.out +++ ql/src/test/results/clientpositive/llap/vector_count.q.out @@ -80,7 +80,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: bigint) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap @@ -153,7 +153,7 @@ STAGE PLANS: sort order: ++++ Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap diff --git ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out new file mode 100644 index 0000000..8552139 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -0,0 +1,781 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + grouping sets: 0, 1, 2, 3 + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY CUBE(key, val) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY CUBE(key, val) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + grouping sets: 0, 1, 2, 3 + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 11 1 +1 NULL 1 +2 12 1 +2 NULL 1 +3 13 1 +3 NULL 1 +7 17 1 +7 NULL 1 +8 18 1 +8 28 1 +8 NULL 2 +NULL 11 1 +NULL 12 1 +NULL 13 1 +NULL 17 1 +NULL 18 1 +NULL 28 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + grouping sets: 0, 1, 2, 3 + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 11 3 1 +1 NULL 1 1 +2 12 3 1 +2 NULL 1 1 +3 13 3 1 +3 NULL 1 1 +7 17 3 1 +7 NULL 1 1 +8 18 3 1 +8 28 3 1 +8 NULL 1 2 +NULL 11 2 1 +NULL 12 2 1 +NULL 13 2 1 +NULL 17 2 1 +NULL 18 2 1 +NULL 28 2 1 +NULL NULL 0 6 +PREHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT val) + grouping sets: 0, 1 + keys: key (type: string), 0 (type: int), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 1 +2 1 +3 1 +7 1 +8 2 +NULL 6 +PREHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + grouping sets: 0, 1, 2, 3 + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 11 1 +1 NULL 1 +2 12 1 +2 NULL 1 +3 13 1 +3 NULL 1 +7 17 1 +7 NULL 1 +8 18 1 +8 28 1 +8 NULL 2 +NULL 11 1 +NULL 12 1 +NULL 13 1 +NULL 17 1 +NULL 18 1 +NULL 28 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT val) + grouping sets: 0, 1 + keys: key (type: string), 0 (type: int), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: final + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 1 +2 1 +3 1 +7 1 +8 2 +NULL 6 +PREHOOK: query: CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: CREATE TABLE T3(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: CREATE TABLE T3(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: EXPLAIN +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + grouping sets: 0, 1, 2, 3 + keys: key (type: string), val (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(1) + grouping sets: 0, 1, 2, 3 + keys: key (type: string), val (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t3 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t3 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t3 +POSTHOOK: query: FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t3 +POSTHOOK: Lineage: t2.key1 SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +POSTHOOK: Lineage: t2.val EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: t3.key1 SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t3.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +POSTHOOK: Lineage: t3.val EXPRESSION [(t1)t1.null, ] diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out new file mode 100644 index 0000000..878c83f --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out @@ -0,0 +1,179 @@ +PREHOOK: query: CREATE TABLE T1_text(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.key SIMPLE [(t1_text)t1_text.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t1.val SIMPLE [(t1_text)t1_text.FieldSchema(name:val, type:string, comment:null), ] +t1_text.key t1_text.val +PREHOOK: query: SELECT key, val, GROUPING__ID from T1 group by key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key val grouping__id +1 11 3 +1 NULL 1 +2 12 3 +2 NULL 1 +3 13 3 +3 NULL 1 +7 17 3 +7 NULL 1 +8 18 3 +8 28 3 +8 NULL 1 +NULL 11 2 +NULL 12 2 +NULL 13 2 +NULL 17 2 +NULL 18 2 +NULL 28 2 +NULL NULL 0 +PREHOOK: query: SELECT key, val, GROUPING__ID from T1 group by cube(key, val) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by cube(key, val) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key val grouping__id +1 11 3 +1 NULL 1 +2 12 3 +2 NULL 1 +3 13 3 +3 NULL 1 +7 17 3 +7 NULL 1 +8 18 3 +8 28 3 +8 NULL 1 +NULL 11 2 +NULL 12 2 +NULL 13 2 +NULL 17 2 +NULL 18 2 +NULL 28 2 +NULL NULL 0 +PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with rollup +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with rollup +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id key val +0 NULL NULL +1 1 NULL +1 2 NULL +1 3 NULL +1 7 NULL +1 8 NULL +3 1 11 +3 2 12 +3 3 13 +3 7 17 +3 8 18 +3 8 28 +PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by rollup (key, val) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, key, val from T1 group by rollup (key, val) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id key val +0 NULL NULL +1 1 NULL +1 2 NULL +1 3 NULL +1 7 NULL +1 8 NULL +3 1 11 +3 2 12 +3 3 13 +3 7 17 +3 8 18 +3 8 28 +PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key val grouping__id _c3 +1 11 3 3 +1 NULL 1 1 +2 12 3 3 +2 NULL 1 1 +3 13 3 3 +3 NULL 1 1 +7 17 3 3 +7 NULL 1 1 +8 18 3 3 +8 28 3 3 +8 NULL 1 1 +NULL 11 2 2 +NULL 12 2 2 +NULL 13 2 2 +NULL 17 2 2 +NULL 18 2 2 +NULL 28 2 2 +NULL NULL 0 0 +PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by cube(key, val) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by cube(key, val) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key val grouping__id _c3 +1 11 3 3 +1 NULL 1 1 +2 12 3 3 +2 NULL 1 1 +3 13 3 3 +3 NULL 1 1 +7 17 3 3 +7 NULL 1 1 +8 18 3 3 +8 28 3 3 +8 NULL 1 1 +NULL 11 2 2 +NULL 12 2 2 +NULL 13 2 2 +NULL 17 2 2 +NULL 18 2 2 +NULL 28 2 2 +NULL NULL 0 0 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out new file mode 100644 index 0000000..41c6883 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out @@ -0,0 +1,359 @@ +PREHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.key SIMPLE [(t1_text)t1_text.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: t1.value SIMPLE [(t1_text)t1_text.FieldSchema(name:value, type:int, comment:null), ] +t1_text.key t1_text.value +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 1 3 1 +1 NULL 1 2 +1 NULL 3 1 +2 2 3 1 +2 NULL 1 1 +3 3 3 1 +3 NULL 1 2 +3 NULL 3 1 +4 5 3 1 +4 NULL 1 1 +NULL NULL 0 6 +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP (key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP (key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 1 3 1 +1 NULL 1 2 +1 NULL 3 1 +2 2 3 1 +2 NULL 1 1 +3 3 3 1 +3 NULL 1 2 +3 NULL 3 1 +4 5 3 1 +4 NULL 1 1 +NULL NULL 0 6 +PREHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id _c1 +0 1 +1 4 +3 6 +PREHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP(key, value) +) t +GROUP BY GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY ROLLUP(key, value) +) t +GROUP BY GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id _c1 +0 1 +1 4 +3 6 +PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.grouping__id t2.grouping__id +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key,value)) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key, value)) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key,value)) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY ROLLUP(key, value)) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.grouping__id t2.grouping__id +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 1 3 1 +1 NULL 1 2 +1 NULL 3 1 +2 2 3 1 +2 NULL 1 1 +3 3 3 1 +3 NULL 1 2 +3 NULL 3 1 +4 5 3 1 +4 NULL 1 1 +NULL NULL 0 6 +PREHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +grouping__id _c1 +0 1 +1 4 +3 6 +PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.grouping__id t2.grouping__id +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out new file mode 100644 index 0000000..fd29322 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out @@ -0,0 +1,234 @@ +PREHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.key SIMPLE [(t1_text)t1_text.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: t1.value SIMPLE [(t1_text)t1_text.FieldSchema(name:value, type:int, comment:null), ] +t1_text.key t1_text.value +PREHOOK: query: EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 0, 1 + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 = 1) (type: boolean) + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), 1 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), 1 (type: int) + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), 1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 NULL 1 2 +2 NULL 1 1 +3 NULL 1 2 +4 NULL 1 1 +PREHOOK: query: EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 1, 0 + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 = 1) (type: boolean) + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 +1 NULL 1 2 +2 NULL 1 1 +3 NULL 1 2 +4 NULL 1 1 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out new file mode 100644 index 0000000..871b0e3 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out @@ -0,0 +1,188 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: SELECT * FROM T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.a t1.b t1.c +1 1 3 +2 2 4 +2 3 5 +3 2 8 +5 2 2 +8 1 1 +PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: SELECT a, b, count(*) from T1 group by cube(a, b) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) from T1 group by cube(a, b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +PREHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a +1 +2 +3 +5 +8 +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a +1 +2 +3 +5 +8 +PREHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +_c0 _c1 +2.0 1 +4.0 1 +5.0 2 +7.0 1 +9.0 1 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out new file mode 100644 index 0000000..2973aba --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out @@ -0,0 +1,473 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by cube(a, b) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by cube(a, b) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, sum(c) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, sum(c) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(c) + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + grouping sets: 0, 1, 2, 3 + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: double) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, sum(c) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, sum(c) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 3.0 +1 NULL 3.0 +2 2 4.0 +2 3 5.0 +2 NULL 9.0 +3 2 8.0 +3 NULL 8.0 +5 2 2.0 +5 NULL 2.0 +8 1 1.0 +8 NULL 1.0 +NULL 1 4.0 +NULL 2 14.0 +NULL 3 5.0 +NULL NULL 23.0 +PREHOOK: query: CREATE TABLE T2(a STRING, b STRING, c int, d int) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(a STRING, b STRING, c int, d int) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: INSERT OVERWRITE TABLE T2 +SELECT a, b, c, c from T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t2 +POSTHOOK: query: INSERT OVERWRITE TABLE T2 +SELECT a, b, c, c from T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.a SIMPLE [(t1)t1.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t2.b SIMPLE [(t1)t1.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t2.c EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, comment:null), ] +POSTHOOK: Lineage: t2.d EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, comment:null), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: EXPLAIN +SELECT a, b, sum(c+d) from T2 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, sum(c+d) from T2 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), (c + d) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + grouping sets: 0, 1, 2, 3 + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, sum(c+d) from T2 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, sum(c+d) from T2 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +a b _c2 +1 1 6 +1 NULL 6 +2 2 8 +2 3 10 +2 NULL 18 +3 2 16 +3 NULL 16 +5 2 4 +5 NULL 4 +8 1 2 +8 NULL 2 +NULL 1 8 +NULL 2 28 +NULL 3 10 +NULL NULL 46 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out new file mode 100644 index 0000000..15450e9 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out @@ -0,0 +1,317 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(c), count() + grouping sets: 0, 1, 2, 3 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by cube(a, b) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by cube(a, b) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(c), count() + grouping sets: 0, 1, 2, 3 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 _c3 +1 1 3.0 2 +1 2 2.0 1 +1 NULL 2.6666666666666665 3 +2 2 5.333333333333333 3 +2 3 5.0 2 +2 NULL 5.2 5 +3 2 8.0 1 +3 NULL 8.0 1 +5 1 2.0 1 +5 NULL 2.0 1 +8 1 1.0 2 +8 NULL 1.0 2 +NULL 1 2.0 5 +NULL 2 5.2 5 +NULL 3 5.0 2 +NULL NULL 3.8333333333333335 12 +PREHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(c), count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + grouping sets: 0, 1, 2, 3 + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 _c3 +1 1 3.0 2 +1 2 2.0 1 +1 NULL 2.6666666666666665 3 +2 2 5.333333333333333 3 +2 3 5.0 2 +2 NULL 5.2 5 +3 2 8.0 1 +3 NULL 8.0 1 +5 1 2.0 1 +5 NULL 2.0 1 +8 1 1.0 2 +8 NULL 1.0 2 +NULL 1 2.0 5 +NULL 2 5.2 5 +NULL 3 5.0 2 +NULL NULL 3.8333333333333335 12 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out new file mode 100644 index 0000000..d3609a9 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out @@ -0,0 +1,560 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 0, 1, 2, 3 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 0, 1, 2, 3 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by cube(a, b) ) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 0, 1, 2, 3 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 0, 1, 2, 3 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +subq1.a subq1.b subq1._c2 subq2.a subq2.b subq2._c2 +1 1 1 1 1 1 +1 1 1 1 NULL 1 +1 NULL 1 1 1 1 +1 NULL 1 1 NULL 1 +2 2 1 2 2 1 +2 2 1 2 3 1 +2 2 1 2 NULL 2 +2 3 1 2 2 1 +2 3 1 2 3 1 +2 3 1 2 NULL 2 +2 NULL 2 2 2 1 +2 NULL 2 2 3 1 +2 NULL 2 2 NULL 2 +PREHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) < 3.0) (type: boolean) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 1122 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 1020 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +subq1.a subq1.b subq1._c2 subq2.a subq2.b subq2._c2 +1 1 1 1 1 1 +1 1 1 1 NULL 1 +1 NULL 1 1 1 1 +1 NULL 1 1 NULL 1 +2 2 1 2 2 1 +2 2 1 2 3 1 +2 2 1 2 NULL 2 +2 3 1 2 2 1 +2 3 1 2 3 1 +2 3 1 2 NULL 2 +2 NULL 2 2 2 1 +2 NULL 2 2 3 1 +2 NULL 2 2 NULL 2 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out new file mode 100644 index 0000000..e283444 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out @@ -0,0 +1,374 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 0, 1, 2, 3 + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by cube(a, b) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by cube(a, b) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 0, 1, 2, 3 + keys: _col0 (type: string), _col1 (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + grouping sets: 0, 1, 2, 3 + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM +(SELECT a, b, count(1) from T1 group by a, b) subq1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 1 1 +8 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out new file mode 100644 index 0000000..45f7eb3 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out @@ -0,0 +1,194 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) = 5.0) (type: boolean) + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 3, 1 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b +5 2 +5 NULL +PREHOOK: query: EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(a) = 5.0) (type: boolean) + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 3, 1 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM +(SELECT a, b from T1 group by a, b grouping sets ( (a,b),a )) res +WHERE res.a=5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b +5 2 +5 NULL diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out new file mode 100644 index 0000000..c026f3f --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out @@ -0,0 +1,866 @@ +PREHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(key INT, value INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.key SIMPLE [(t1_text)t1_text.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: t1.value SIMPLE [(t1_text)t1_text.FieldSchema(name:value, type:int, comment:null), ] +t1_text.key t1_text.value +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 0, 1, 3 + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 _c4 +1 1 3 1 1 +1 NULL 1 0 1 +1 NULL 3 1 1 +2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 +3 NULL 1 0 1 +3 NULL 3 1 1 +4 5 3 1 1 +4 NULL 1 0 1 +NULL NULL 0 0 0 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 0, 1, 2, 3 + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 _c4 +1 1 3 1 1 +1 NULL 1 0 1 +1 NULL 3 1 1 +2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 +3 NULL 1 0 1 +3 NULL 3 1 1 +4 5 3 1 1 +4 NULL 1 0 1 +NULL 1 2 1 0 +NULL 2 2 1 0 +NULL 3 2 1 0 +NULL 5 2 1 0 +NULL NULL 0 0 0 +NULL NULL 2 1 0 +PREHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 0, 1, 2, 3 + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToInteger(grouping(_col2, 1)) = 1) (type: boolean) + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value +1 1 +1 NULL +2 2 +3 3 +3 NULL +4 5 +NULL 1 +NULL 2 +NULL 3 +NULL 5 +NULL NULL +PREHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 0, 1, 2, 3 + keys: _col0 (type: int), _col1 (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((UDFToInteger(grouping(_col2, 1)) = 1) or (UDFToInteger(grouping(_col2, 0)) = 1)) (type: boolean) + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) ELSE (null) END (type: int) + sort order: -+ + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value x +1 1 2 +1 NULL 1 +1 NULL 2 +2 2 2 +2 NULL 1 +3 3 2 +3 NULL 1 +3 NULL 2 +4 5 2 +4 NULL 1 +NULL 1 1 +NULL 2 1 +NULL 3 1 +NULL 5 1 +NULL NULL 1 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 0, 1, 3 + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 _c4 +1 1 3 1 1 +1 NULL 1 0 1 +1 NULL 3 1 1 +2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 +3 NULL 1 0 1 +3 NULL 3 1 1 +4 5 3 1 1 +4 NULL 1 0 1 +NULL NULL 0 0 0 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 0, 1, 2, 3 + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key), grouping(value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value grouping__id _c3 _c4 +1 1 3 1 1 +1 NULL 1 0 1 +1 NULL 3 1 1 +2 2 3 1 1 +2 NULL 1 0 1 +3 3 3 1 1 +3 NULL 1 0 1 +3 NULL 3 1 1 +4 5 3 1 1 +4 NULL 1 0 1 +NULL 1 2 1 0 +NULL 2 2 1 0 +NULL 3 2 1 0 +NULL 5 2 1 0 +NULL NULL 0 0 0 +NULL NULL 2 1 0 +PREHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 0, 1, 2, 3 + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (grouping(_col2, 1) = 1) (type: boolean) + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from T1 +group by cube(key, value) +having grouping(key) = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value +1 1 +1 NULL +2 2 +3 3 +3 NULL +4 5 +NULL 1 +NULL 2 +NULL 3 +NULL 5 +NULL NULL +PREHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 0, 1, 2, 3 + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((grouping(_col2, 1) = 1) or (grouping(_col2, 0) = 1)) (type: boolean) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) END (type: int) + sort order: -+ + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, grouping(key)+grouping(value) as x +from T1 +group by cube(key, value) +having grouping(key) = 1 OR grouping(value) = 1 +order by x desc, case when x = 1 then key end +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +key value x +1 1 2 +1 NULL 1 +1 NULL 2 +2 2 2 +2 NULL 1 +3 3 2 +3 NULL 1 +3 NULL 2 +4 5 2 +4 NULL 1 +NULL 1 1 +NULL 2 1 +NULL 3 1 +NULL 5 1 +NULL NULL 1 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out new file mode 100644 index 0000000..154ce88 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out @@ -0,0 +1,574 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 0, 1, 2, 3 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2550 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2550 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 0, 1, 2, 3 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2550 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2550 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 NULL 1 +NULL 1 2 +NULL 2 3 +NULL 3 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 3, 1 + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 +1 1 1 +1 NULL 1 +2 2 1 +2 3 1 +2 NULL 2 +3 2 1 +3 NULL 1 +5 2 1 +5 NULL 1 +8 NULL 1 +PREHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string), c (type: string) + outputColumnNames: a, b, c + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 1, 2, 4 + keys: a (type: string), b (type: string), c (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 4590 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) + Statistics: Num rows: 18 Data size: 4590 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a +1 +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string) + outputColumnNames: a + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + grouping sets: 1 + keys: a (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Limit + Number of rows: 10 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a +1 +2 +3 +5 +8 +PREHOOK: query: EXPLAIN +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (UDFToDouble(a) + UDFToDouble(b)) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + grouping sets: 1 + keys: _col0 (type: double), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: int) + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: double), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col2 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: double), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b) LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +_c0 _c1 +2.0 1 +4.0 1 +5.0 2 +7.0 1 +9.0 1 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out new file mode 100644 index 0000000..333f071 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out @@ -0,0 +1,158 @@ +PREHOOK: query: create table t(category int, live int, comments int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t(category int, live int, comments int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into table t select key, 0, 2 from src tablesample(3 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t +POSTHOOK: query: insert into table t select key, 0, 2 from src tablesample(3 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.category EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t.comments SIMPLE [] +POSTHOOK: Lineage: t.live SIMPLE [] +_col0 _col1 _col2 +PREHOOK: query: explain +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: category (type: int), live (type: int), comments (type: int) + outputColumnNames: category, live, comments + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(live), max(comments) + grouping sets: 0, 1 + keys: category (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), max(VALUE._col1) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Filter Operator + predicate: (_col3 > 0) (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col2: int, _col3: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col3 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col3 + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select category, max(live) live, max(comments) comments, rank() OVER (PARTITION BY category ORDER BY comments) rank1 +FROM t +GROUP BY category +GROUPING SETS ((), (category)) +HAVING max(comments) > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +category live comments rank1 +NULL 0 2 1 +86 0 2 1 +238 0 2 1 +311 0 2 1 diff --git ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out index 77a0695..299c164 100644 --- ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out @@ -155,6 +155,7 @@ STAGE PLANS: outputColumnNames: s_store_id Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1 keys: s_store_id (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -164,10 +165,10 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int) @@ -240,6 +241,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1 keys: _col0 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -249,7 +251,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 Execution mode: vectorized, llap @@ -328,6 +330,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1 keys: _col0 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -337,7 +340,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_15.q.out ql/src/test/results/clientpositive/llap/vectorization_15.q.out index 04cd902..7fdb8e9 100644 --- ql/src/test/results/clientpositive/llap/vectorization_15.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_15.q.out @@ -215,15 +215,15 @@ POSTHOOK: Input: default@alltypesorc -51.0 true NULL QiOcvR0kt6r7f0R7fiPxQTCU -51 266531954 1969-12-31 16:00:08.451 0.0 -266531980.28 NULL NULL 33.0 0.0 0.0 NULL 51 0.0 2.66532E8 -23 266531980.28 0.0 -51.0 true NULL Ybpj38RTTYl7CnJXPNx1g4C -51 -370919370 1969-12-31 16:00:08.451 0.0 370919343.72 NULL NULL 33.0 0.0 0.0 NULL 51 0.0 -3.70919296E8 -23 -370919343.72 0.0 -6.0 NULL -200.0 NULL -6 NULL 1969-12-31 15:59:56.094 0.0 NULL -200.0 -15910.599999999999 3.0 0.0 0.0 -23.0 6 NULL NULL -5 NULL NULL --62.0 NULL 15601.0 NULL -62 NULL 1969-12-31 16:00:09.889 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 62 NULL NULL -23 NULL NULL +-62.0 NULL 15601.0 NULL -62 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 62 NULL NULL -23 NULL NULL 11.0 false NULL 10pO8p1LNx4Y 11 271296824 1969-12-31 16:00:02.351 0.0 -271296850.28 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 2.71296832E8 -1 271296850.28 0.0 11.0 false NULL 1H6wGP 11 -560827082 1969-12-31 16:00:02.351 0.0 560827055.72 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 -5.6082707E8 -1 -560827055.72 0.0 11.0 false NULL 2a7V63IL7jK3o 11 -325931647 1969-12-31 16:00:02.351 0.0 325931620.72 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 -3.25931648E8 -1 -325931620.72 0.0 11.0 true NULL 10 11 92365813 1969-12-31 16:00:02.351 0.0 -92365839.28 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 9.2365808E7 -1 92365839.28 0.0 -21.0 NULL 15601.0 NULL 21 NULL 1969-12-31 16:00:14.256 0.0 NULL 15601.0 1241106.353 12.0 0.0 0.0 -23.0 -21 NULL NULL -2 NULL NULL +21.0 NULL 15601.0 NULL 21 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 12.0 0.0 0.0 -23.0 -21 NULL NULL -2 NULL NULL 32.0 NULL -200.0 NULL 32 NULL 1969-12-31 16:00:02.445 0.0 NULL -200.0 -15910.599999999999 1.0 0.0 0.0 -23.0 -32 NULL NULL -23 NULL NULL 36.0 NULL -200.0 NULL 36 NULL 1969-12-31 16:00:00.554 0.0 NULL -200.0 -15910.599999999999 33.0 0.0 0.0 -23.0 -36 NULL NULL -23 NULL NULL -5.0 NULL 15601.0 NULL 5 NULL 1969-12-31 16:00:00.959 0.0 NULL 15601.0 1241106.353 3.0 0.0 0.0 -23.0 -5 NULL NULL -3 NULL NULL +5.0 NULL 15601.0 NULL 5 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 3.0 0.0 0.0 -23.0 -5 NULL NULL -3 NULL NULL 58.0 NULL 15601.0 NULL 58 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 -58 NULL NULL -23 NULL NULL 8.0 false NULL 10V3pN5r5lI2qWl2lG103 8 -362835731 1969-12-31 16:00:15.892 0.0 362835704.72 NULL NULL 1.0 0.0 0.0 NULL -8 0.0 -3.62835744E8 -7 -362835704.72 0.0 8.0 false NULL 10c4qt584m5y6uWT 8 -183000142 1969-12-31 16:00:15.892 0.0 183000115.72 NULL NULL 1.0 0.0 0.0 NULL -8 0.0 -1.8300016E8 -7 -183000115.72 0.0 diff --git ql/src/test/results/clientpositive/vector_count.q.out ql/src/test/results/clientpositive/vector_count.q.out index e829ad8..35e0c9d 100644 --- ql/src/test/results/clientpositive/vector_count.q.out +++ ql/src/test/results/clientpositive/vector_count.q.out @@ -74,7 +74,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: bigint) - Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col2) @@ -138,7 +137,6 @@ STAGE PLANS: sort order: ++++ Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint) - Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) diff --git ql/src/test/results/clientpositive/vector_grouping_sets.q.out ql/src/test/results/clientpositive/vector_grouping_sets.q.out index 58d1f87..5f296aa 100644 --- ql/src/test/results/clientpositive/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/vector_grouping_sets.q.out @@ -149,6 +149,7 @@ STAGE PLANS: outputColumnNames: s_store_id Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1 keys: s_store_id (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -158,6 +159,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int) @@ -224,6 +226,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1 keys: _col0 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -233,6 +236,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int) @@ -302,6 +306,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 25632 Basic stats: COMPLETE Column stats: NONE Group By Operator + grouping sets: 0, 1 keys: _col0 (type: string), 0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -311,6 +316,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 24 Data size: 51264 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int) diff --git ql/src/test/results/clientpositive/vectorization_15.q.out ql/src/test/results/clientpositive/vectorization_15.q.out index 5de2092..969a74b 100644 --- ql/src/test/results/clientpositive/vectorization_15.q.out +++ ql/src/test/results/clientpositive/vectorization_15.q.out @@ -215,15 +215,15 @@ POSTHOOK: Input: default@alltypesorc -51.0 true NULL QiOcvR0kt6r7f0R7fiPxQTCU -51 266531954 1969-12-31 16:00:08.451 0.0 -266531980.28 NULL NULL 33.0 0.0 0.0 NULL 51 0.0 2.66532E8 -23 266531980.28 0.0 -51.0 true NULL Ybpj38RTTYl7CnJXPNx1g4C -51 -370919370 1969-12-31 16:00:08.451 0.0 370919343.72 NULL NULL 33.0 0.0 0.0 NULL 51 0.0 -3.70919296E8 -23 -370919343.72 0.0 -6.0 NULL -200.0 NULL -6 NULL 1969-12-31 15:59:56.094 0.0 NULL -200.0 -15910.599999999999 3.0 0.0 0.0 -23.0 6 NULL NULL -5 NULL NULL --62.0 NULL 15601.0 NULL -62 NULL 1969-12-31 16:00:09.889 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 62 NULL NULL -23 NULL NULL +-62.0 NULL 15601.0 NULL -62 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 62 NULL NULL -23 NULL NULL 11.0 false NULL 10pO8p1LNx4Y 11 271296824 1969-12-31 16:00:02.351 0.0 -271296850.28 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 2.71296832E8 -1 271296850.28 0.0 11.0 false NULL 1H6wGP 11 -560827082 1969-12-31 16:00:02.351 0.0 560827055.72 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 -5.6082707E8 -1 -560827055.72 0.0 11.0 false NULL 2a7V63IL7jK3o 11 -325931647 1969-12-31 16:00:02.351 0.0 325931620.72 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 -3.25931648E8 -1 -325931620.72 0.0 11.0 true NULL 10 11 92365813 1969-12-31 16:00:02.351 0.0 -92365839.28 NULL NULL 0.0 0.0 0.0 NULL -11 0.0 9.2365808E7 -1 92365839.28 0.0 -21.0 NULL 15601.0 NULL 21 NULL 1969-12-31 16:00:14.256 0.0 NULL 15601.0 1241106.353 12.0 0.0 0.0 -23.0 -21 NULL NULL -2 NULL NULL +21.0 NULL 15601.0 NULL 21 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 12.0 0.0 0.0 -23.0 -21 NULL NULL -2 NULL NULL 32.0 NULL -200.0 NULL 32 NULL 1969-12-31 16:00:02.445 0.0 NULL -200.0 -15910.599999999999 1.0 0.0 0.0 -23.0 -32 NULL NULL -23 NULL NULL 36.0 NULL -200.0 NULL 36 NULL 1969-12-31 16:00:00.554 0.0 NULL -200.0 -15910.599999999999 33.0 0.0 0.0 -23.0 -36 NULL NULL -23 NULL NULL -5.0 NULL 15601.0 NULL 5 NULL 1969-12-31 16:00:00.959 0.0 NULL 15601.0 1241106.353 3.0 0.0 0.0 -23.0 -5 NULL NULL -3 NULL NULL +5.0 NULL 15601.0 NULL 5 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 3.0 0.0 0.0 -23.0 -5 NULL NULL -3 NULL NULL 58.0 NULL 15601.0 NULL 58 NULL 1969-12-31 15:59:56.527 0.0 NULL 15601.0 1241106.353 33.0 0.0 0.0 -23.0 -58 NULL NULL -23 NULL NULL 8.0 false NULL 10V3pN5r5lI2qWl2lG103 8 -362835731 1969-12-31 16:00:15.892 0.0 362835704.72 NULL NULL 1.0 0.0 0.0 NULL -8 0.0 -3.62835744E8 -7 -362835704.72 0.0 8.0 false NULL 10c4qt584m5y6uWT 8 -183000142 1969-12-31 16:00:15.892 0.0 183000115.72 NULL NULL 1.0 0.0 0.0 NULL -8 0.0 -1.8300016E8 -7 -183000115.72 0.0 diff --git ql/src/test/results/clientpositive/vectorization_limit.q.out ql/src/test/results/clientpositive/vectorization_limit.q.out index 39ea939..3466f03 100644 --- ql/src/test/results/clientpositive/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -340,7 +340,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) diff --git ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out index f8ae962..21f933f 100644 --- ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out @@ -46,7 +46,6 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0) @@ -105,7 +104,6 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), avg(DISTINCT KEY._col0:2._col0), std(DISTINCT KEY._col0:3._col0)