diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties
index 96a03f6..92e79e5 100644
--- itests/src/test/resources/testconfiguration.properties
+++ itests/src/test/resources/testconfiguration.properties
@@ -142,6 +142,7 @@ minillap.shared.query.files=acid_globallimit.q,\
count.q,\
create_merge_compressed.q,\
cross_join.q,\
+ cross_prod_1.q,\
cross_product_check_1.q,\
cross_product_check_2.q,\
ctas.q,\
@@ -444,6 +445,7 @@ minillap.query.files=acid_bucket_pruning.q,\
bucket_map_join_tez2.q,\
bucketpruning1.q,\
constprog_dpp.q,\
+ cross_prod_1.q\
dynamic_partition_pruning.q,\
dynamic_partition_pruning_2.q,\
explainuser_1.q,\
diff --git itests/src/test/resources/testconfiguration.properties.orig itests/src/test/resources/testconfiguration.properties.orig
new file mode 100644
index 0000000..a920ca9
--- /dev/null
+++ itests/src/test/resources/testconfiguration.properties.orig
@@ -0,0 +1,1377 @@
+# NOTE: files should be listed in alphabetical order
+minimr.query.files=auto_sortmerge_join_16.q,\
+ bucket4.q,\
+ bucket5.q,\
+ bucket6.q,\
+ bucket_many.q,\
+ bucket_num_reducers.q,\
+ bucket_num_reducers2.q,\
+ bucketizedhiveinputformat.q,\
+ bucketmapjoin6.q,\
+ bucketmapjoin7.q,\
+ disable_merge_for_bucketing.q,\
+ empty_dir_in_table.q,\
+ exchgpartition2lel.q,\
+ external_table_with_space_in_location_path.q,\
+ file_with_header_footer.q,\
+ groupby2.q,\
+ import_exported_table.q,\
+ index_bitmap3.q,\
+ index_bitmap_auto.q,\
+ infer_bucket_sort_bucketed_table.q,\
+ infer_bucket_sort_dyn_part.q,\
+ infer_bucket_sort_map_operators.q,\
+ infer_bucket_sort_merge.q,\
+ infer_bucket_sort_num_buckets.q,\
+ infer_bucket_sort_reducers_power_two.q,\
+ input16_cc.q,\
+ insert_dir_distcp.q,\
+ join1.q,\
+ join_acid_non_acid.q,\
+ leftsemijoin_mr.q,\
+ list_bucket_dml_10.q,\
+ load_fs2.q,\
+ load_hdfs_file_with_space_in_the_name.q,\
+ non_native_window_udf.q, \
+ parallel_orderby.q,\
+ quotedid_smb.q,\
+ reduce_deduplicate.q,\
+ remote_script.q,\
+ root_dir_external_table.q,\
+ schemeAuthority.q,\
+ schemeAuthority2.q,\
+ scriptfile1.q,\
+ scriptfile1_win.q,\
+ skewjoin_onesideskew.q,\
+ table_nonprintable.q,\
+ temp_table_external.q,\
+ truncate_column_buckets.q,\
+ uber_reduce.q,\
+ udf_using.q
+
+# These tests are disabled for minimr
+# ql_rewrite_gbtoidx.q,\
+# ql_rewrite_gbtoidx_cbo_1.q,\
+# ql_rewrite_gbtoidx_cbo_2.q,\
+# smb_mapjoin_8.q,\
+
+
+# Tests that are not enabled for CLI Driver
+disabled.query.files=ql_rewrite_gbtoidx.q,\
+ ql_rewrite_gbtoidx_cbo_1.q,\
+ ql_rewrite_gbtoidx_cbo_2.q,\
+ rcfile_merge1.q,\
+ smb_mapjoin_8.q
+
+# NOTE: Add tests to minitez only if it is very
+# specific to tez and cannot be added to minillap.
+minitez.query.files.shared=delete_orig_table.q,\
+ orc_merge12.q,\
+ orc_vectorization_ppd.q,\
+ unionDistinct_2.q,\
+ update_orig_table.q,\
+ vector_join_part_col_char.q,\
+ vector_non_string_partition.q,\
+ vectorization_div0.q,\
+ vectorization_limit.q
+
+# NOTE: Add tests to minitez only if it is very
+# specific to tez and cannot be added to minillap.
+minitez.query.files=explainuser_3.q,\
+ explainanalyze_1.q,\
+ explainanalyze_2.q,\
+ explainanalyze_3.q,\
+ explainanalyze_4.q,\
+ explainanalyze_5.q,\
+ hybridgrace_hashjoin_1.q,\
+ hybridgrace_hashjoin_2.q,\
+ partition_column_names_with_leading_and_trailing_spaces.q,\
+ stats_filemetadata.q,\
+ tez_union_with_udf.q
+
+minillap.shared.query.files=acid_globallimit.q,\
+ alter_merge_2_orc.q,\
+ alter_merge_orc.q,\
+ alter_merge_stats_orc.q,\
+ auto_join0.q,\
+ auto_join1.q,\
+ auto_join21.q,\
+ auto_join29.q,\
+ auto_join30.q,\
+ auto_join_filters.q,\
+ auto_join_nulls.q,\
+ auto_sortmerge_join_1.q,\
+ auto_sortmerge_join_10.q,\
+ auto_sortmerge_join_11.q,\
+ auto_sortmerge_join_12.q,\
+ auto_sortmerge_join_13.q,\
+ auto_sortmerge_join_14.q,\
+ auto_sortmerge_join_15.q,\
+ auto_sortmerge_join_16.q,\
+ auto_sortmerge_join_2.q,\
+ auto_sortmerge_join_3.q,\
+ auto_sortmerge_join_4.q,\
+ auto_sortmerge_join_5.q,\
+ auto_sortmerge_join_6.q,\
+ auto_sortmerge_join_7.q,\
+ auto_sortmerge_join_8.q,\
+ auto_sortmerge_join_9.q,\
+ bucket2.q,\
+ bucket3.q,\
+ bucket4.q,\
+ bucket_map_join_tez1.q,\
+ bucket_map_join_tez2.q,\
+ cbo_gby.q,\
+ cbo_gby_empty.q,\
+ cbo_join.q,\
+ cbo_limit.q,\
+ cbo_semijoin.q,\
+ cbo_simple_select.q,\
+ cbo_stats.q,\
+ cbo_subq_exists.q,\
+ cbo_subq_in.q,\
+ cbo_subq_not_in.q,\
+ cbo_udf_udaf.q,\
+ cbo_union.q,\
+ cbo_views.q,\
+ cbo_windowing.q,\
+ column_names_with_leading_and_trailing_spaces.q,\
+ constprog_dpp.q,\
+ constprog_semijoin.q,\
+ correlationoptimizer1.q,\
+ count.q,\
+ create_merge_compressed.q,\
+ cross_join.q,\
+ cross_product_check_1.q,\
+ cross_product_check_2.q,\
+ ctas.q,\
+ cte_1.q,\
+ cte_2.q,\
+ cte_3.q,\
+ cte_4.q,\
+ cte_5.q,\
+ cte_mat_1.q,\
+ cte_mat_2.q,\
+ cte_mat_3.q,\
+ cte_mat_4.q,\
+ cte_mat_5.q,\
+ custom_input_output_format.q,\
+ deleteAnalyze.q,\
+ delete_all_non_partitioned.q,\
+ delete_all_partitioned.q,\
+ delete_tmp_table.q,\
+ delete_where_no_match.q,\
+ delete_where_non_partitioned.q,\
+ delete_where_partitioned.q,\
+ delete_whole_partition.q,\
+ disable_merge_for_bucketing.q,\
+ dynamic_partition_pruning.q,\
+ dynamic_partition_pruning_2.q,\
+ dynpart_sort_opt_vectorization.q,\
+ dynpart_sort_optimization.q,\
+ dynpart_sort_optimization2.q,\
+ empty_join.q,\
+ enforce_order.q,\
+ filter_join_breaktask.q,\
+ filter_join_breaktask2.q,\
+ groupby1.q,\
+ groupby2.q,\
+ groupby3.q,\
+ having.q,\
+ identity_project_remove_skip.q,\
+ insert1.q,\
+ insert_into1.q,\
+ insert_into2.q,\
+ insert_orig_table.q,\
+ insert_update_delete.q,\
+ insert_values_dynamic_partitioned.q,\
+ insert_values_non_partitioned.q,\
+ insert_values_orig_table.,\
+ insert_values_partitioned.q,\
+ insert_values_tmp_table.q,\
+ join0.q,\
+ join1.q,\
+ join_nullsafe.q,\
+ leftsemijoin.q,\
+ limit_pushdown.q,\
+ llap_nullscan.q,\
+ llapdecider.q,\
+ load_dyn_part1.q,\
+ load_dyn_part2.q,\
+ load_dyn_part3.q,\
+ lvj_mapjoin.q,\
+ mapjoin2.q,\
+ mapjoin_decimal.q,\
+ mapjoin_mapjoin.q,\
+ mapreduce1.q,\
+ mapreduce2.q,\
+ merge1.q,\
+ merge2.q,\
+ mergejoin.q,\
+ metadata_only_queries.q,\
+ metadata_only_queries_with_filters.q,\
+ metadataonly1.q,\
+ mrr.q,\
+ nonmr_fetch_threshold.q,\
+ optimize_nullscan.q,\
+ orc_analyze.q,\
+ orc_merge1.q,\
+ orc_merge10.q,\
+ orc_merge11.q,\
+ orc_merge2.q,\
+ orc_merge3.q,\
+ orc_merge4.q,\
+ orc_merge5.q,\
+ orc_merge6.q,\
+ orc_merge7.q,\
+ orc_merge8.q,\
+ orc_merge9.q,\
+ orc_merge_diff_fs.q,\
+ orc_merge_incompat1.q,\
+ orc_merge_incompat2.q,\
+ orc_merge_incompat3.q,\
+ orc_ppd_basic.q,\
+ orc_ppd_schema_evol_1a.q,\
+ orc_ppd_schema_evol_1b.q,\
+ orc_ppd_schema_evol_2a.q,\
+ orc_ppd_schema_evol_2b.q,\
+ orc_ppd_schema_evol_3a.q,\
+ order_null.q,\
+ parallel.q,\
+ ptf.q,\
+ ptf_matchpath.q,\
+ ptf_streaming.q,\
+ sample1.q,\
+ script_env_var1.q,\
+ script_env_var2.q,\
+ script_pipe.q,\
+ scriptfile1.q,\
+ selectDistinctStar.q,\
+ select_dummy_source.q,\
+ skewjoin.q,\
+ stats_noscan_1.q,\
+ stats_only_null.q,\
+ subquery_exists.q,\
+ subquery_in.q,\
+ temp_table.q,\
+ tez_bmj_schema_evolution.q,\
+ tez_dml.q,\
+ tez_dynpart_hashjoin_1.q,\
+ tez_dynpart_hashjoin_2.q,\
+ tez_fsstat.q,\
+ tez_insert_overwrite_local_directory_1.q,\
+ tez_join.q,\
+ tez_join_hash.q,\
+ tez_join_result_complex.q,\
+ tez_join_tests.q,\
+ tez_joins_explain.q,\
+ tez_multi_union.q,\
+ tez_schema_evolution.q,\
+ tez_self_join.q,\
+ tez_smb_1.q,\
+ tez_smb_main.q,\
+ tez_union.q,\
+ tez_union2.q,\
+ tez_union_decimal.q,\
+ tez_union_dynamic_partition.q,\
+ tez_union_group_by.q,\
+ tez_union_multiinsert.q,\
+ tez_union_view.q,\
+ tez_vector_dynpart_hashjoin_1.q,\
+ tez_vector_dynpart_hashjoin_2.q,\
+ transform1.q,\
+ transform2.q,\
+ transform_ppr1.q,\
+ transform_ppr2.q,\
+ union2.q,\
+ union3.q,\
+ union4.q,\
+ union5.q,\
+ union6.q,\
+ union7.q,\
+ union8.q,\
+ union9.q,\
+ unionDistinct_1.q,\
+ union_fast_stats.q,\
+ union_stats.q,\
+ union_type_chk.q,\
+ update_after_multiple_inserts.q,\
+ update_all_non_partitioned.q,\
+ update_all_partitioned.q,\
+ update_all_types.q,\
+ update_tmp_table.q,\
+ update_two_cols.q,\
+ update_where_no_match.q,\
+ update_where_non_partitioned.q,\
+ update_where_partitioned.q,\
+ vector_acid3.q,\
+ vector_aggregate_9.q,\
+ vector_aggregate_without_gby.q,\
+ vector_auto_smb_mapjoin_14.q,\
+ vector_between_columns.q,\
+ vector_between_in.q,\
+ vector_binary_join_groupby.q,\
+ vector_bround.q,\
+ vector_bucket.q,\
+ vector_cast_constant.q,\
+ vector_char_2.q,\
+ vector_char_4.q,\
+ vector_char_cast.q,\
+ vector_char_mapjoin1.q,\
+ vector_char_simple.q,\
+ vector_coalesce.q,\
+ vector_coalesce_2.q,\
+ vector_complex_all.q,\
+ vector_complex_join.q,\
+ vector_count.q,\
+ vector_count_distinct.q,\
+ vector_data_types.q,\
+ vector_date_1.q,\
+ vector_decimal_1.q,\
+ vector_decimal_10_0.q,\
+ vector_decimal_2.q,\
+ vector_decimal_3.q,\
+ vector_decimal_4.q,\
+ vector_decimal_5.q,\
+ vector_decimal_6.q,\
+ vector_decimal_aggregate.q,\
+ vector_decimal_cast.q,\
+ vector_decimal_expressions.q,\
+ vector_decimal_mapjoin.q,\
+ vector_decimal_math_funcs.q,\
+ vector_decimal_precision.q,\
+ vector_decimal_round.q,\
+ vector_decimal_round_2.q,\
+ vector_decimal_trailing.q,\
+ vector_decimal_udf.q,\
+ vector_decimal_udf2.q,\
+ vector_distinct_2.q,\
+ vector_elt.q,\
+ vector_groupby4.q,\
+ vector_groupby6.q,\
+ vector_groupby_3.q,\
+ vector_groupby_mapjoin.q,\
+ vector_groupby_reduce.q,\
+ vector_grouping_sets.q,\
+ vector_if_expr.q,\
+ vector_include_no_sel.q,\
+ vector_inner_join.q,\
+ vector_interval_1.q,\
+ vector_interval_2.q,\
+ vector_interval_arithmetic.q,\
+ vector_interval_mapjoin.q,\
+ vector_join30.q,\
+ vector_join_filters.q,\
+ vector_join_nulls.q,\
+ vector_left_outer_join.q,\
+ vector_left_outer_join2.q,\
+ vector_leftsemi_mapjoin.q,\
+ vector_mapjoin_reduce.q,\
+ vector_mr_diff_schema_alias.q,\
+ vector_multi_insert.q,\
+ vector_null_projection.q,\
+ vector_nullsafe_join.q,\
+ vector_nvl.q,\
+ vector_orderby_5.q,\
+ vector_outer_join0.q,\
+ vector_outer_join1.q,\
+ vector_outer_join2.q,\
+ vector_outer_join3.q,\
+ vector_outer_join4.q,\
+ vector_outer_join5.q,\
+ vector_outer_join6.q,\
+ vector_partition_diff_num_cols.q,\
+ vector_partitioned_date_time.q,\
+ vector_reduce1.q,\
+ vector_reduce2.q,\
+ vector_reduce3.q,\
+ vector_reduce_groupby_decimal.q,\
+ vector_string_concat.q,\
+ vector_struct_in.q,\
+ vector_varchar_4.q,\
+ vector_varchar_mapjoin1.q,\
+ vector_varchar_simple.q,\
+ vector_when_case_null.q,\
+ vectorization_0.q,\
+ vectorization_1.q,\
+ vectorization_10.q,\
+ vectorization_11.q,\
+ vectorization_12.q,\
+ vectorization_13.q,\
+ vectorization_14.q,\
+ vectorization_15.q,\
+ vectorization_16.q,\
+ vectorization_17.q,\
+ vectorization_2.q,\
+ vectorization_3.q,\
+ vectorization_4.q,\
+ vectorization_5.q,\
+ vectorization_6.q,\
+ vectorization_7.q,\
+ vectorization_8.q,\
+ vectorization_9.q,\
+ vectorization_decimal_date.q,\
+ vectorization_nested_udf.q,\
+ vectorization_not.q,\
+ vectorization_part.q,\
+ vectorization_part_project.q,\
+ vectorization_part_varchar.q,\
+ vectorization_pushdown.q,\
+ vectorization_short_regress.q,\
+ vectorized_bucketmapjoin1.q,\
+ vectorized_case.q,\
+ vectorized_casts.q,\
+ vectorized_context.q,\
+ vectorized_date_funcs.q,\
+ vectorized_distinct_gby.q,\
+ vectorized_dynamic_partition_pruning.q,\
+ vectorized_mapjoin.q,\
+ vectorized_math_funcs.q,\
+ vectorized_nested_mapjoin.q,\
+ vectorized_parquet.q,\
+ vectorized_parquet_types.q,\
+ vectorized_ptf.q,\
+ vectorized_rcfile_columnar.q,\
+ vectorized_shufflejoin.q,\
+ vectorized_string_funcs.q,\
+ vectorized_timestamp.q,\
+ vectorized_timestamp_funcs.q,\
+ vectorized_timestamp_ints_casts.q
+
+minillap.query.files=acid_bucket_pruning.q,\
+ acid_vectorization_missing_cols.q,\
+ bucket_map_join_tez1.q,\
+ bucket_map_join_tez2.q,\
+ bucketpruning1.q,\
+ constprog_dpp.q,\
+ dynamic_partition_pruning.q,\
+ dynamic_partition_pruning_2.q,\
+ explainuser_1.q,\
+ explainuser_2.q,\
+ explainuser_4.q,\
+ hybridgrace_hashjoin_1.q,\
+ hybridgrace_hashjoin_2.q,\
+ llap_nullscan.q,\
+ llap_udf.q,\
+ llapdecider.q,\
+ lvj_mapjoin.q,\
+ mapjoin_decimal.q,\
+ mergejoin_3way.q,\
+ mrr.q,\
+ orc_llap.q,\
+ orc_llap_counters.q,\
+ orc_llap_counters1.q,\
+ orc_llap_nonvector.q,\
+ orc_ppd_basic.q,\
+ schema_evol_orc_acid_part.q,\
+ schema_evol_orc_acid_part_update.q,\
+ schema_evol_orc_acid_table.q,\
+ schema_evol_orc_acid_table_update.q,\
+ schema_evol_orc_acidvec_part.q,\
+ schema_evol_orc_acidvec_part_update.q,\
+ schema_evol_orc_acidvec_table.q,\
+ schema_evol_orc_acidvec_table_update.q,\
+ schema_evol_orc_nonvec_part.q,\
+ schema_evol_orc_nonvec_part_all_complex.q,\
+ schema_evol_orc_nonvec_part_all_primitive.q,\
+ schema_evol_orc_nonvec_table.q,\
+ schema_evol_orc_vec_part.q,\
+ schema_evol_orc_vec_part_all_complex.q,\
+ schema_evol_orc_vec_part_all_primitive.q,\
+ schema_evol_orc_vec_table.q,\
+ schema_evol_stats.q,\
+ schema_evol_text_nonvec_part.q,\
+ schema_evol_text_nonvec_part_all_complex.q,\
+ schema_evol_text_nonvec_part_all_primitive.q,\
+ schema_evol_text_nonvec_table.q,\
+ schema_evol_text_vec_part.q,\
+ schema_evol_text_vec_part_all_complex.q,\
+ schema_evol_text_vec_part_all_primitive.q,\
+ schema_evol_text_vec_table.q,\
+ schema_evol_text_vecrow_part.q,\
+ schema_evol_text_vecrow_part_all_complex.q,\
+ schema_evol_text_vecrow_part_all_primitive.q,\
+ schema_evol_text_vecrow_table.q,\
+ smb_cache.q,\
+ tez_aggr_part_stats.q,\
+ tez_bmj_schema_evolution.q,\
+ tez_dml.q,\
+ tez_dynpart_hashjoin_1.q,\
+ tez_dynpart_hashjoin_2.q,\
+ tez_dynpart_hashjoin_3.q,\
+ tez_fsstat.q,\
+ tez_insert_overwrite_local_directory_1.q,\
+ tez_join.q,\
+ tez_join_result_complex.q,\
+ tez_join_tests.q,\
+ tez_joins_explain.q,\
+ tez_multi_union.q,\
+ tez_schema_evolution.q,\
+ tez_self_join.q,\
+ tez_smb_1.q,\
+ tez_smb_empty.q,\
+ tez_smb_main.q,\
+ tez_union.q,\
+ tez_union2.q,\
+ tez_union_decimal.q,\
+ tez_union_dynamic_partition.q,\
+ tez_union_group_by.q,\
+ tez_union_multiinsert.q,\
+ tez_union_view.q,\
+ tez_vector_dynpart_hashjoin_1.q,\
+ tez_vector_dynpart_hashjoin_2.q,\
+ vectorized_dynamic_partition_pruning.q,\
+ windowing_gby.q
+
+encrypted.query.files=encryption_join_unencrypted_tbl.q,\
+ encryption_insert_partition_static.q,\
+ encryption_insert_partition_dynamic.q,\
+ encryption_join_with_different_encryption_keys.q,\
+ encryption_select_read_only_encrypted_tbl.q,\
+ encryption_select_read_only_unencrypted_tbl.q,\
+ encryption_load_data_to_encrypted_tables.q, \
+ encryption_unencrypted_nonhdfs_external_tables.q \
+ encryption_move_tbl.q \
+ encryption_drop_table.q \
+ encryption_insert_values.q \
+ encryption_drop_view.q \
+ encryption_drop_partition.q \
+ encryption_with_trash.q \
+ encryption_ctas.q
+
+beeline.positive.exclude=add_part_exist.q,\
+ alter1.q,\
+ alter2.q,\
+ alter4.q,\
+ alter5.q,\
+ alter_rename_partition.q,\
+ alter_rename_partition_authorization.q,\
+ archive.q,\
+ archive_corrupt.q,\
+ archive_mr_1806.q,\
+ archive_multi.q,\
+ archive_multi_mr_1806.q,\
+ authorization_1.q,\
+ authorization_2.q,\
+ authorization_4.q,\
+ authorization_5.q,\
+ authorization_6.q,\
+ authorization_7.q,\
+ ba_table1.q,\
+ ba_table2.q,\
+ ba_table3.q,\
+ ba_table_udfs.q,\
+ binary_table_bincolserde.q,\
+ binary_table_colserde.q,\
+ cluster.q,\
+ columnarserde_create_shortcut.q,\
+ combine2.q,\
+ constant_prop.q,\
+ create_nested_type.q,\
+ create_or_replace_view.q,\
+ create_struct_table.q,\
+ create_union_table.q,\
+ database.q,\
+ database_location.q,\
+ database_properties.q,\
+ describe_database_json.q,\
+ drop_database_removes_partition_dirs.q,\
+ escape1.q,\
+ escape2.q,\
+ exim_00_nonpart_empty.q,\
+ exim_01_nonpart.q,\
+ exim_02_00_part_empty.q,\
+ exim_02_part.q,\
+ exim_03_nonpart_over_compat.q,\
+ exim_04_all_part.q,\
+ exim_04_evolved_parts.q,\
+ exim_05_some_part.q,\
+ exim_06_one_part.q,\
+ exim_07_all_part_over_nonoverlap.q,\
+ exim_08_nonpart_rename.q,\
+ exim_09_part_spec_nonoverlap.q,\
+ exim_10_external_managed.q,\
+ exim_11_managed_external.q,\
+ exim_12_external_location.q,\
+ exim_13_managed_location.q,\
+ exim_14_managed_location_over_existing.q,\
+ exim_15_external_part.q,\
+ exim_16_part_external.q,\
+ exim_17_part_managed.q,\
+ exim_18_part_external.q,\
+ exim_19_00_part_external_location.q,\
+ exim_19_part_external_location.q,\
+ exim_20_part_managed_location.q,\
+ exim_21_export_authsuccess.q,\
+ exim_22_import_exist_authsuccess.q,\
+ exim_23_import_part_authsuccess.q,\
+ exim_24_import_nonexist_authsuccess.q,\
+ global_limit.q,\
+ groupby_complex_types.q,\
+ groupby_complex_types_multi_single_reducer.q,\
+ index_auth.q,\
+ index_auto.q,\
+ index_auto_empty.q,\
+ index_bitmap.q,\
+ index_bitmap1.q,\
+ index_bitmap2.q,\
+ index_bitmap3.q,\
+ index_bitmap_auto.q,\
+ index_bitmap_rc.q,\
+ index_compact.q,\
+ index_compact_1.q,\
+ index_compact_2.q,\
+ index_compact_3.q,\
+ index_stale_partitioned.q,\
+ init_file.q,\
+ input16.q,\
+ input16_cc.q,\
+ input46.q,\
+ input_columnarserde.q,\
+ input_dynamicserde.q,\
+ input_lazyserde.q,\
+ input_testxpath3.q,\
+ input_testxpath4.q,\
+ insert2_overwrite_partitions.q,\
+ insertexternal1.q,\
+ join_thrift.q,\
+ lateral_view.q,\
+ load_binary_data.q,\
+ load_exist_part_authsuccess.q,\
+ load_nonpart_authsuccess.q,\
+ load_part_authsuccess.q,\
+ loadpart_err.q,\
+ lock1.q,\
+ lock2.q,\
+ lock3.q,\
+ lock4.q,\
+ merge_dynamic_partition.q,\
+ multi_insert.q,\
+ multi_insert_move_tasks_share_dependencies.q,\
+ null_column.q,\
+ ppd_clusterby.q,\
+ query_with_semi.q,\
+ rename_column.q,\
+ sample6.q,\
+ sample_islocalmode_hook.q,\
+ set_processor_namespaces.q,\
+ show_tables.q,\
+ source.q,\
+ split_sample.q,\
+ str_to_map.q,\
+ transform1.q,\
+ udaf_collect_set.q,\
+ udaf_context_ngrams.q,\
+ udaf_histogram_numeric.q,\
+ udaf_ngrams.q,\
+ udaf_percentile_approx.q,\
+ udf_array.q,\
+ udf_bitmap_and.q,\
+ udf_bitmap_or.q,\
+ udf_explode.q,\
+ udf_format_number.q,\
+ udf_map.q,\
+ udf_map_keys.q,\
+ udf_map_values.q,\
+ udf_mask.q,\
+ udf_mask_first_n.q,\
+ udf_mask_hash.q,\
+ udf_mask_last_n.q,\
+ udf_mask_show_first_n.q,\
+ udf_mask_show_last_n.q,\
+ udf_max.q,\
+ udf_min.q,\
+ udf_named_struct.q,\
+ udf_percentile.q,\
+ udf_printf.q,\
+ udf_sentences.q,\
+ udf_sort_array.q,\
+ udf_split.q,\
+ udf_struct.q,\
+ udf_substr.q,\
+ udf_translate.q,\
+ udf_union.q,\
+ udf_xpath.q,\
+ udtf_stack.q,\
+ view.q,\
+ virtual_column.q
+
+minimr.query.negative.files=cluster_tasklog_retrieval.q,\
+ file_with_header_footer_negative.q,\
+ local_mapred_error_cache.q,\
+ mapreduce_stack_trace.q,\
+ mapreduce_stack_trace_hadoop20.q,\
+ mapreduce_stack_trace_turnoff.q,\
+ mapreduce_stack_trace_turnoff_hadoop20.q,\
+ minimr_broken_pipe.q,\
+ table_nonprintable_negative.q,\
+ udf_local_resource.q
+
+# tests are sorted use: perl -pe 's@\\\s*\n@ @g' testconfiguration.properties \
+# | awk -F= '/spark.query.files/{print $2}' | perl -pe 's@.q *, *@\n@g' \
+# | egrep -v '^ *$' | sort -V | uniq | perl -pe 's@\n@.q, \\\n@g' | perl -pe 's@^@ @g'
+spark.query.files=add_part_multiple.q, \
+ alter_merge_orc.q, \
+ alter_merge_stats_orc.q, \
+ annotate_stats_join.q, \
+ auto_join0.q, \
+ auto_join1.q, \
+ auto_join10.q, \
+ auto_join11.q, \
+ auto_join12.q, \
+ auto_join13.q, \
+ auto_join14.q, \
+ auto_join15.q, \
+ auto_join16.q, \
+ auto_join17.q, \
+ auto_join18.q, \
+ auto_join18_multi_distinct.q, \
+ auto_join19.q, \
+ auto_join2.q, \
+ auto_join20.q, \
+ auto_join21.q, \
+ auto_join22.q, \
+ auto_join23.q, \
+ auto_join24.q, \
+ auto_join26.q, \
+ auto_join27.q, \
+ auto_join28.q, \
+ auto_join29.q, \
+ auto_join3.q, \
+ auto_join30.q, \
+ auto_join31.q, \
+ auto_join4.q, \
+ auto_join5.q, \
+ auto_join6.q, \
+ auto_join7.q, \
+ auto_join8.q, \
+ auto_join9.q, \
+ auto_join_filters.q, \
+ auto_join_nulls.q, \
+ auto_join_reordering_values.q, \
+ auto_join_stats.q, \
+ auto_join_stats2.q, \
+ auto_join_without_localtask.q, \
+ auto_smb_mapjoin_14.q, \
+ auto_sortmerge_join_1.q, \
+ auto_sortmerge_join_10.q, \
+ auto_sortmerge_join_12.q, \
+ auto_sortmerge_join_13.q, \
+ auto_sortmerge_join_14.q, \
+ auto_sortmerge_join_15.q, \
+ auto_sortmerge_join_16.q, \
+ auto_sortmerge_join_3.q, \
+ auto_sortmerge_join_4.q, \
+ auto_sortmerge_join_5.q, \
+ auto_sortmerge_join_6.q, \
+ auto_sortmerge_join_7.q, \
+ auto_sortmerge_join_8.q, \
+ auto_sortmerge_join_9.q, \
+ avro_compression_enabled_native.q, \
+ avro_decimal_native.q, \
+ avro_joins.q, \
+ avro_joins_native.q, \
+ bucket2.q, \
+ bucket3.q, \
+ bucket4.q, \
+ bucket_map_join_1.q, \
+ bucket_map_join_2.q, \
+ bucket_map_join_spark1.q, \
+ bucket_map_join_spark2.q, \
+ bucket_map_join_spark3.q, \
+ bucket_map_join_spark4.q, \
+ bucket_map_join_tez1.q, \
+ bucket_map_join_tez2.q, \
+ bucketmapjoin1.q, \
+ bucketmapjoin10.q, \
+ bucketmapjoin11.q, \
+ bucketmapjoin12.q, \
+ bucketmapjoin13.q, \
+ bucketmapjoin2.q, \
+ bucketmapjoin3.q, \
+ bucketmapjoin4.q, \
+ bucketmapjoin5.q, \
+ bucketmapjoin7.q, \
+ bucketmapjoin8.q, \
+ bucketmapjoin9.q, \
+ bucketmapjoin_negative.q, \
+ bucketmapjoin_negative2.q, \
+ bucketmapjoin_negative3.q, \
+ bucketsortoptimize_insert_2.q, \
+ bucketsortoptimize_insert_4.q, \
+ bucketsortoptimize_insert_6.q, \
+ bucketsortoptimize_insert_7.q, \
+ bucketsortoptimize_insert_8.q, \
+ cbo_gby.q, \
+ cbo_gby_empty.q, \
+ cbo_limit.q, \
+ cbo_semijoin.q, \
+ cbo_simple_select.q, \
+ cbo_stats.q, \
+ cbo_subq_in.q, \
+ cbo_subq_not_in.q, \
+ cbo_udf_udaf.q, \
+ cbo_union.q, \
+ column_access_stats.q, \
+ count.q, \
+ create_merge_compressed.q, \
+ cross_join.q, \
+ cross_product_check_1.q, \
+ cross_product_check_2.q, \
+ ctas.q, \
+ custom_input_output_format.q, \
+ date_join1.q, \
+ date_udf.q, \
+ decimal_1_1.q, \
+ decimal_join.q, \
+ disable_merge_for_bucketing.q, \
+ dynamic_rdd_cache.q, \
+ enforce_order.q, \
+ escape_clusterby1.q, \
+ escape_distributeby1.q, \
+ escape_orderby1.q, \
+ escape_sortby1.q, \
+ filter_join_breaktask.q, \
+ filter_join_breaktask2.q, \
+ groupby1.q, \
+ groupby10.q, \
+ groupby11.q, \
+ groupby1_map.q, \
+ groupby1_map_nomap.q, \
+ groupby1_map_skew.q, \
+ groupby1_noskew.q, \
+ groupby2.q, \
+ groupby2_map.q, \
+ groupby2_map_multi_distinct.q, \
+ groupby2_map_skew.q, \
+ groupby2_noskew.q, \
+ groupby2_noskew_multi_distinct.q, \
+ groupby3.q, \
+ groupby3_map.q, \
+ groupby3_map_multi_distinct.q, \
+ groupby3_map_skew.q, \
+ groupby3_noskew.q, \
+ groupby3_noskew_multi_distinct.q, \
+ groupby4.q, \
+ groupby4_map.q, \
+ groupby4_map_skew.q, \
+ groupby4_noskew.q, \
+ groupby5.q, \
+ groupby5_map.q, \
+ groupby5_map_skew.q, \
+ groupby5_noskew.q, \
+ groupby6.q, \
+ groupby6_map.q, \
+ groupby6_map_skew.q, \
+ groupby6_noskew.q, \
+ groupby7.q, \
+ groupby7_map.q, \
+ groupby7_map_multi_single_reducer.q, \
+ groupby7_map_skew.q, \
+ groupby7_noskew.q, \
+ groupby7_noskew_multi_single_reducer.q, \
+ groupby8.q, \
+ groupby8_map.q, \
+ groupby8_map_skew.q, \
+ groupby8_noskew.q, \
+ groupby9.q, \
+ groupby_bigdata.q, \
+ groupby_complex_types.q, \
+ groupby_complex_types_multi_single_reducer.q, \
+ groupby_cube1.q, \
+ groupby_grouping_id2.q, \
+ groupby_map_ppr.q, \
+ groupby_map_ppr_multi_distinct.q, \
+ groupby_multi_insert_common_distinct.q, \
+ groupby_multi_single_reducer.q, \
+ groupby_multi_single_reducer2.q, \
+ groupby_multi_single_reducer3.q, \
+ groupby_position.q, \
+ groupby_ppr.q, \
+ groupby_ppr_multi_distinct.q, \
+ groupby_resolution.q, \
+ groupby_rollup1.q, \
+ groupby_sort_1_23.q, \
+ groupby_sort_skew_1.q, \
+ groupby_sort_skew_1_23.q, \
+ qroupby_limit_extrastep.q, \
+ having.q, \
+ identity_project_remove_skip.q, \
+ index_auto_self_join.q, \
+ innerjoin.q, \
+ input12.q, \
+ input13.q, \
+ input14.q, \
+ input17.q, \
+ input18.q, \
+ input1_limit.q, \
+ input_part2.q, \
+ insert_into1.q, \
+ insert_into2.q, \
+ insert_into3.q, \
+ join0.q, \
+ join1.q, \
+ join10.q, \
+ join11.q, \
+ join12.q, \
+ join13.q, \
+ join14.q, \
+ join15.q, \
+ join16.q, \
+ join17.q, \
+ join18.q, \
+ join18_multi_distinct.q, \
+ join19.q, \
+ join2.q, \
+ join20.q, \
+ join21.q, \
+ join22.q, \
+ join23.q, \
+ join24.q, \
+ join25.q, \
+ join26.q, \
+ join27.q, \
+ join28.q, \
+ join29.q, \
+ join3.q, \
+ join30.q, \
+ join31.q, \
+ join32.q, \
+ join32_lessSize.q, \
+ join33.q, \
+ join34.q, \
+ join35.q, \
+ join36.q, \
+ join37.q, \
+ join38.q, \
+ join39.q, \
+ join4.q, \
+ join41.q, \
+ join5.q, \
+ join6.q, \
+ join7.q, \
+ join8.q, \
+ join9.q, \
+ join_1to1.q, \
+ join_alt_syntax.q, \
+ join_array.q, \
+ join_casesensitive.q, \
+ join_cond_pushdown_1.q, \
+ join_cond_pushdown_2.q, \
+ join_cond_pushdown_3.q, \
+ join_cond_pushdown_4.q, \
+ join_cond_pushdown_unqual1.q, \
+ join_cond_pushdown_unqual2.q, \
+ join_cond_pushdown_unqual3.q, \
+ join_cond_pushdown_unqual4.q, \
+ join_filters_overlap.q, \
+ join_hive_626.q, \
+ join_literals.q, \
+ join_map_ppr.q, \
+ join_merge_multi_expressions.q, \
+ join_merging.q, \
+ join_nullsafe.q, \
+ join_rc.q, \
+ join_reorder.q, \
+ join_reorder2.q, \
+ join_reorder3.q, \
+ join_reorder4.q, \
+ join_star.q, \
+ join_thrift.q, \
+ join_vc.q, \
+ join_view.q, \
+ lateral_view_explode2.q, \
+ leftsemijoin.q, \
+ leftsemijoin_mr.q, \
+ limit_partition_metadataonly.q, \
+ limit_pushdown.q, \
+ list_bucket_dml_2.q, \
+ load_dyn_part1.q, \
+ load_dyn_part10.q, \
+ load_dyn_part11.q, \
+ load_dyn_part12.q, \
+ load_dyn_part13.q, \
+ load_dyn_part14.q, \
+ load_dyn_part15.q, \
+ load_dyn_part2.q, \
+ load_dyn_part3.q, \
+ load_dyn_part4.q, \
+ load_dyn_part5.q, \
+ load_dyn_part6.q, \
+ load_dyn_part7.q, \
+ load_dyn_part8.q, \
+ load_dyn_part9.q, \
+ louter_join_ppr.q, \
+ mapjoin1.q, \
+ mapjoin_addjar.q, \
+ mapjoin_decimal.q, \
+ mapjoin_distinct.q, \
+ mapjoin_filter_on_outerjoin.q, \
+ mapjoin_mapjoin.q, \
+ mapjoin_memcheck.q, \
+ mapjoin_subquery.q, \
+ mapjoin_subquery2.q, \
+ mapjoin_test_outer.q, \
+ mapreduce1.q, \
+ mapreduce2.q, \
+ merge1.q, \
+ merge2.q, \
+ mergejoins.q, \
+ mergejoins_mixed.q, \
+ metadata_only_queries.q, \
+ metadata_only_queries_with_filters.q, \
+ multi_insert.q, \
+ multi_insert_gby.q, \
+ multi_insert_gby2.q, \
+ multi_insert_gby3.q, \
+ multi_insert_lateral_view.q, \
+ multi_insert_mixed.q, \
+ multi_insert_move_tasks_share_dependencies.q, \
+ multi_insert_with_join.q, \
+ multi_join_union.q, \
+ multi_join_union_src.q, \
+ multigroupby_singlemr.q, \
+ nullgroup.q, \
+ nullgroup2.q, \
+ nullgroup4.q, \
+ nullgroup4_multi_distinct.q, \
+ optimize_nullscan.q, \
+ order.q, \
+ order2.q, \
+ outer_join_ppr.q, \
+ parallel.q, \
+ parallel_join0.q, \
+ parallel_join1.q, \
+ parquet_join.q, \
+ pcr.q, \
+ ppd_gby_join.q, \
+ ppd_join.q, \
+ ppd_join2.q, \
+ ppd_join3.q, \
+ ppd_join5.q, \
+ ppd_join_filter.q, \
+ ppd_multi_insert.q, \
+ ppd_outer_join1.q, \
+ ppd_outer_join2.q, \
+ ppd_outer_join3.q, \
+ ppd_outer_join4.q, \
+ ppd_outer_join5.q, \
+ ppd_transform.q, \
+ ptf.q, \
+ ptf_decimal.q, \
+ ptf_general_queries.q, \
+ ptf_matchpath.q, \
+ ptf_rcfile.q, \
+ ptf_register_tblfn.q, \
+ ptf_seqfile.q, \
+ ptf_streaming.q, \
+ rcfile_bigdata.q, \
+ reduce_deduplicate_exclude_join.q, \
+ router_join_ppr.q, \
+ runtime_skewjoin_mapjoin_spark.q, \
+ sample1.q, \
+ sample10.q, \
+ sample2.q, \
+ sample3.q, \
+ sample4.q, \
+ sample5.q, \
+ sample6.q, \
+ sample7.q, \
+ sample8.q, \
+ sample9.q, \
+ script_env_var1.q, \
+ script_env_var2.q, \
+ script_pipe.q, \
+ scriptfile1.q, \
+ semijoin.q, \
+ skewjoin.q, \
+ skewjoin_noskew.q, \
+ skewjoin_union_remove_1.q, \
+ skewjoin_union_remove_2.q, \
+ skewjoinopt1.q, \
+ skewjoinopt10.q, \
+ skewjoinopt11.q, \
+ skewjoinopt12.q, \
+ skewjoinopt13.q, \
+ skewjoinopt14.q, \
+ skewjoinopt15.q, \
+ skewjoinopt16.q, \
+ skewjoinopt17.q, \
+ skewjoinopt18.q, \
+ skewjoinopt19.q, \
+ skewjoinopt2.q, \
+ skewjoinopt20.q, \
+ skewjoinopt3.q, \
+ skewjoinopt4.q, \
+ skewjoinopt5.q, \
+ skewjoinopt6.q, \
+ skewjoinopt7.q, \
+ skewjoinopt8.q, \
+ skewjoinopt9.q, \
+ smb_mapjoin_1.q, \
+ smb_mapjoin_10.q, \
+ smb_mapjoin_11.q, \
+ smb_mapjoin_12.q, \
+ smb_mapjoin_13.q, \
+ smb_mapjoin_14.q, \
+ smb_mapjoin_15.q, \
+ smb_mapjoin_16.q, \
+ smb_mapjoin_17.q, \
+ smb_mapjoin_18.q, \
+ smb_mapjoin_19.q, \
+ smb_mapjoin_2.q, \
+ smb_mapjoin_20.q, \
+ smb_mapjoin_21.q, \
+ smb_mapjoin_22.q, \
+ smb_mapjoin_25.q, \
+ smb_mapjoin_3.q, \
+ smb_mapjoin_4.q, \
+ smb_mapjoin_5.q, \
+ smb_mapjoin_6.q, \
+ smb_mapjoin_7.q, \
+ smb_mapjoin_8.q, \
+ smb_mapjoin_9.q, \
+ sort.q, \
+ stats0.q, \
+ stats1.q, \
+ stats10.q, \
+ stats12.q, \
+ stats13.q, \
+ stats14.q, \
+ stats15.q, \
+ stats16.q, \
+ stats18.q, \
+ stats2.q, \
+ stats3.q, \
+ stats5.q, \
+ stats6.q, \
+ stats7.q, \
+ stats8.q, \
+ stats9.q, \
+ stats_noscan_1.q, \
+ stats_noscan_2.q, \
+ stats_only_null.q, \
+ stats_partscan_1_23.q, \
+ statsfs.q, \
+ subquery_exists.q, \
+ subquery_in.q, \
+ subquery_multiinsert.q, \
+ table_access_keys_stats.q, \
+ temp_table.q, \
+ temp_table_gb1.q, \
+ temp_table_join1.q, \
+ tez_join_tests.q, \
+ tez_joins_explain.q, \
+ timestamp_1.q, \
+ timestamp_2.q, \
+ timestamp_3.q, \
+ timestamp_comparison.q, \
+ timestamp_lazy.q, \
+ timestamp_null.q, \
+ timestamp_udf.q, \
+ transform2.q, \
+ transform_ppr1.q, \
+ transform_ppr2.q, \
+ udaf_collect_set.q, \
+ udf_example_add.q, \
+ udf_in_file.q, \
+ udf_max.q, \
+ udf_min.q, \
+ udf_percentile.q, \
+ union.q, \
+ union10.q, \
+ union11.q, \
+ union12.q, \
+ union13.q, \
+ union14.q, \
+ union15.q, \
+ union16.q, \
+ union17.q, \
+ union18.q, \
+ union19.q, \
+ union2.q, \
+ union20.q, \
+ union21.q, \
+ union22.q, \
+ union23.q, \
+ union24.q, \
+ union25.q, \
+ union26.q, \
+ union27.q, \
+ union28.q, \
+ union29.q, \
+ union3.q, \
+ union30.q, \
+ union31.q, \
+ union32.q, \
+ union33.q, \
+ union34.q, \
+ union4.q, \
+ union5.q, \
+ union6.q, \
+ union7.q, \
+ union8.q, \
+ union9.q, \
+ union_date.q, \
+ union_date_trim.q, \
+ union_lateralview.q, \
+ union_null.q, \
+ union_ppr.q, \
+ union_remove_1.q, \
+ union_remove_10.q, \
+ union_remove_11.q, \
+ union_remove_12.q, \
+ union_remove_13.q, \
+ union_remove_14.q, \
+ union_remove_15.q, \
+ union_remove_16.q, \
+ union_remove_17.q, \
+ union_remove_18.q, \
+ union_remove_19.q, \
+ union_remove_2.q, \
+ union_remove_20.q, \
+ union_remove_21.q, \
+ union_remove_22.q, \
+ union_remove_23.q, \
+ union_remove_24.q, \
+ union_remove_25.q, \
+ union_remove_3.q, \
+ union_remove_4.q, \
+ union_remove_5.q, \
+ union_remove_6.q, \
+ union_remove_6_subq.q, \
+ union_remove_7.q, \
+ union_remove_8.q, \
+ union_remove_9.q, \
+ union_script.q, \
+ union_top_level.q, \
+ union_view.q, \
+ uniquejoin.q, \
+ varchar_join1.q, \
+ vector_between_in.q, \
+ vector_cast_constant.q, \
+ vector_char_4.q, \
+ vector_count_distinct.q, \
+ vector_data_types.q, \
+ vector_decimal_aggregate.q, \
+ vector_decimal_mapjoin.q, \
+ vector_distinct_2.q, \
+ vector_elt.q, \
+ vector_groupby_3.q, \
+ vector_left_outer_join.q, \
+ vector_mapjoin_reduce.q, \
+ vector_orderby_5.q, \
+ vector_string_concat.q, \
+ vector_varchar_4.q, \
+ vectorization_0.q, \
+ vectorization_1.q, \
+ vectorization_10.q, \
+ vectorization_11.q, \
+ vectorization_12.q, \
+ vectorization_13.q, \
+ vectorization_14.q, \
+ vectorization_15.q, \
+ vectorization_16.q, \
+ vectorization_17.q, \
+ vectorization_2.q, \
+ vectorization_3.q, \
+ vectorization_4.q, \
+ vectorization_5.q, \
+ vectorization_6.q, \
+ vectorization_9.q, \
+ vectorization_decimal_date.q, \
+ vectorization_div0.q, \
+ vectorization_nested_udf.q, \
+ vectorization_not.q, \
+ vectorization_part.q, \
+ vectorization_part_project.q, \
+ vectorization_pushdown.q, \
+ vectorization_short_regress.q, \
+ vectorized_case.q, \
+ vectorized_mapjoin.q, \
+ vectorized_math_funcs.q, \
+ vectorized_nested_mapjoin.q, \
+ vectorized_ptf.q, \
+ vectorized_rcfile_columnar.q, \
+ vectorized_shufflejoin.q, \
+ vectorized_string_funcs.q, \
+ vectorized_timestamp_funcs.q, \
+ windowing.q
+
+# Unlike "spark.query.files" above, these tests only run
+# under Spark engine.
+spark.only.query.files=spark_dynamic_partition_pruning.q,\
+ spark_dynamic_partition_pruning_2.q,\
+ spark_vectorized_dynamic_partition_pruning.q
+
+miniSparkOnYarn.query.files=auto_sortmerge_join_16.q,\
+ bucket4.q,\
+ bucket5.q,\
+ bucket6.q,\
+ bucketizedhiveinputformat.q,\
+ bucketmapjoin6.q,\
+ bucketmapjoin7.q,\
+ constprog_partitioner.q,\
+ constprog_semijoin.q,\
+ disable_merge_for_bucketing.q,\
+ empty_dir_in_table.q,\
+ external_table_with_space_in_location_path.q,\
+ file_with_header_footer.q,\
+ gen_udf_example_add10.q,\
+ import_exported_table.q,\
+ index_bitmap3.q,\
+ index_bitmap_auto.q,\
+ infer_bucket_sort_bucketed_table.q,\
+ infer_bucket_sort_map_operators.q,\
+ infer_bucket_sort_merge.q,\
+ infer_bucket_sort_num_buckets.q,\
+ infer_bucket_sort_reducers_power_two.q,\
+ input16_cc.q,\
+ insert_overwrite_directory2.q,\
+ leftsemijoin_mr.q,\
+ list_bucket_dml_10.q,\
+ load_fs2.q,\
+ load_hdfs_file_with_space_in_the_name.q,\
+ orc_merge1.q,\
+ orc_merge2.q,\
+ orc_merge3.q,\
+ orc_merge4.q,\
+ orc_merge5.q,\
+ orc_merge6.q,\
+ orc_merge7.q,\
+ orc_merge8.q,\
+ orc_merge9.q,\
+ orc_merge_diff_fs.q,\
+ orc_merge_incompat1.q,\
+ orc_merge_incompat2.q,\
+ parallel_orderby.q,\
+ quotedid_smb.q,\
+ reduce_deduplicate.q,\
+ remote_script.q,\
+ root_dir_external_table.q,\
+ schemeAuthority.q,\
+ schemeAuthority2.q,\
+ scriptfile1.q,\
+ scriptfile1_win.q,\
+ temp_table_external.q,\
+ truncate_column_buckets.q,\
+ uber_reduce.q,\
+ vector_inner_join.q,\
+ vector_outer_join0.q,\
+ vector_outer_join1.q,\
+ vector_outer_join2.q,\
+ vector_outer_join3.q,\
+ vector_outer_join4.q,\
+ vector_outer_join5.q
+
+# These tests are removed from miniSparkOnYarn.query.files
+# ql_rewrite_gbtoidx.q,\
+# ql_rewrite_gbtoidx_cbo_1.q,\
+# smb_mapjoin_8.q,\
+
+
+spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\
+ groupby2_multi_distinct.q,\
+ groupby3_map_skew_multi_distinct.q,\
+ groupby3_multi_distinct.q,\
+ groupby_grouping_sets7.q
diff --git pom.xml pom.xml
index 2fb78cd..261f993 100644
--- pom.xml
+++ pom.xml
@@ -175,7 +175,7 @@
1.0.1
1.7.10
4.0.4
- 0.8.4
+ 0.9.0-SNAPSHOT
0.90.2-incubating
2.2.0
1.6.0
diff --git ql/src/java/org/apache/hadoop/hive/ql/Context.java.orig ql/src/java/org/apache/hadoop/hive/ql/Context.java.orig
new file mode 100644
index 0000000..4667f68
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/Context.java.orig
@@ -0,0 +1,829 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql;
+
+import java.io.DataInput;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.antlr.runtime.TokenRewriteStream;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hive.common.FileUtils;
+import org.apache.hadoop.hive.common.BlobStorageUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.TaskRunner;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager.Heartbeater;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLock;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj;
+import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager;
+import org.apache.hadoop.hive.ql.lockmgr.LockException;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.util.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Context for Semantic Analyzers. Usage: not reusable - construct a new one for
+ * each query should call clear() at end of use to remove temporary folders
+ */
+public class Context {
+ private boolean isHDFSCleanup;
+ private Path resFile;
+ private Path resDir;
+ private FileSystem resFs;
+ private static final Logger LOG = LoggerFactory.getLogger("hive.ql.Context");
+ private Path[] resDirPaths;
+ private int resDirFilesNum;
+ boolean initialized;
+ String originalTracker = null;
+ private final CompilationOpContext opContext;
+ private final Map pathToCS = new ConcurrentHashMap();
+
+ // scratch path to use for all non-local (ie. hdfs) file system tmp folders
+ private final Path nonLocalScratchPath;
+
+ // scratch directory to use for local file system tmp folders
+ private final String localScratchDir;
+
+ // the permission to scratch directory (local and hdfs)
+ private final String scratchDirPermission;
+
+ // Keeps track of scratch directories created for different scheme/authority
+ private final Map fsScratchDirs = new HashMap();
+
+ private final Configuration conf;
+ protected int pathid = 10000;
+ protected boolean explain = false;
+ protected String cboInfo;
+ protected boolean cboSucceeded;
+ protected boolean explainLogical = false;
+ protected String cmd = "";
+ // number of previous attempts
+ protected int tryCount = 0;
+ private TokenRewriteStream tokenRewriteStream;
+
+ private String executionId;
+
+ // List of Locks for this query
+ protected List hiveLocks;
+
+ // Transaction manager for this query
+ protected HiveTxnManager hiveTxnManager;
+
+ // Used to track what type of acid operation (insert, update, or delete) we are doing. Useful
+ // since we want to change where bucket columns are accessed in some operators and
+ // optimizations when doing updates and deletes.
+ private AcidUtils.Operation acidOperation = AcidUtils.Operation.NOT_ACID;
+
+ private boolean needLockMgr;
+
+ private AtomicInteger sequencer = new AtomicInteger();
+
+ private final Map cteTables = new HashMap();
+
+ // Keep track of the mapping from load table desc to the output and the lock
+ private final Map loadTableOutputMap =
+ new HashMap();
+ private final Map> outputLockObjects =
+ new HashMap>();
+
+ private final String stagingDir;
+
+ private Heartbeater heartbeater;
+
+ private boolean skipTableMasking;
+
+ public Context(Configuration conf) throws IOException {
+ this(conf, generateExecutionId());
+ }
+
+ /**
+ * Create a Context with a given executionId. ExecutionId, together with
+ * user name and conf, will determine the temporary directory locations.
+ */
+ public Context(Configuration conf, String executionId) {
+ this.conf = conf;
+ this.executionId = executionId;
+
+ // local & non-local tmp location is configurable. however it is the same across
+ // all external file systems
+ nonLocalScratchPath = new Path(SessionState.getHDFSSessionPath(conf), executionId);
+ localScratchDir = new Path(SessionState.getLocalSessionPath(conf), executionId).toUri().getPath();
+ scratchDirPermission = HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIRPERMISSION);
+ stagingDir = HiveConf.getVar(conf, HiveConf.ConfVars.STAGINGDIR);
+ opContext = new CompilationOpContext();
+ }
+
+
+ public Map getLoadTableOutputMap() {
+ return loadTableOutputMap;
+ }
+
+ public Map> getOutputLockObjects() {
+ return outputLockObjects;
+ }
+
+ /**
+ * Set the context on whether the current query is an explain query.
+ * @param value true if the query is an explain query, false if not
+ */
+ public void setExplain(boolean value) {
+ explain = value;
+ }
+
+ /**
+ * Find whether the current query is an explain query
+ * @return true if the query is an explain query, false if not
+ */
+ public boolean getExplain() {
+ return explain;
+ }
+
+ /**
+ * Find whether the current query is a logical explain query
+ */
+ public boolean getExplainLogical() {
+ return explainLogical;
+ }
+
+ /**
+ * Set the context on whether the current query is a logical
+ * explain query.
+ */
+ public void setExplainLogical(boolean explainLogical) {
+ this.explainLogical = explainLogical;
+ }
+
+ /**
+ * Set the original query command.
+ * @param cmd the original query command string
+ */
+ public void setCmd(String cmd) {
+ this.cmd = cmd;
+ }
+
+ /**
+ * Find the original query command.
+ * @return the original query command string
+ */
+ public String getCmd () {
+ return cmd;
+ }
+
+ /**
+ * Gets a temporary staging directory related to a path.
+ * If a path already contains a staging directory, then returns the current directory; otherwise
+ * create the directory if needed.
+ *
+ * @param inputPath URI of the temporary directory
+ * @param mkdir Create the directory if True.
+ * @return A temporary path.
+ */
+ private Path getStagingDir(Path inputPath, boolean mkdir) {
+ final URI inputPathUri = inputPath.toUri();
+ final String inputPathName = inputPathUri.getPath();
+ final String fileSystem = inputPathUri.getScheme() + ":" + inputPathUri.getAuthority();
+ final FileSystem fs;
+
+ try {
+ fs = inputPath.getFileSystem(conf);
+ } catch (IOException e) {
+ throw new IllegalStateException("Error getting FileSystem for " + inputPath + ": "+ e, e);
+ }
+
+ String stagingPathName;
+ if (inputPathName.indexOf(stagingDir) == -1) {
+ stagingPathName = new Path(inputPathName, stagingDir).toString();
+ } else {
+ stagingPathName = inputPathName.substring(0, inputPathName.indexOf(stagingDir) + stagingDir.length());
+ }
+
+ final String key = fileSystem + "-" + stagingPathName + "-" + TaskRunner.getTaskRunnerID();
+
+ Path dir = fsScratchDirs.get(key);
+ if (dir == null) {
+ // Append task specific info to stagingPathName, instead of creating a sub-directory.
+ // This way we don't have to worry about deleting the stagingPathName separately at
+ // end of query execution.
+ dir = fs.makeQualified(new Path(stagingPathName + "_" + this.executionId + "-" + TaskRunner.getTaskRunnerID()));
+
+ LOG.debug("Created staging dir = " + dir + " for path = " + inputPath);
+
+ if (mkdir) {
+ try {
+ boolean inheritPerms = HiveConf.getBoolVar(conf,
+ HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS);
+ if (!FileUtils.mkdir(fs, dir, inheritPerms, conf)) {
+ throw new IllegalStateException("Cannot create staging directory '" + dir.toString() + "'");
+ }
+
+ if (isHDFSCleanup) {
+ fs.deleteOnExit(dir);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException("Cannot create staging directory '" + dir.toString() + "': " + e.getMessage(), e);
+ }
+ }
+
+ fsScratchDirs.put(key, dir);
+ }
+
+ return dir;
+ }
+
+ /**
+ * Get a tmp directory on specified URI
+ *
+ * @param scheme Scheme of the target FS
+ * @param authority Authority of the target FS
+ * @param mkdir create the directory if true
+ * @param scratchDir path of tmp directory
+ */
+ private Path getScratchDir(String scheme, String authority,
+ boolean mkdir, String scratchDir) {
+
+ String fileSystem = scheme + ":" + authority;
+ Path dir = fsScratchDirs.get(fileSystem + "-" + TaskRunner.getTaskRunnerID());
+
+ if (dir == null) {
+ Path dirPath = new Path(scheme, authority,
+ scratchDir + "-" + TaskRunner.getTaskRunnerID());
+ if (mkdir) {
+ try {
+ FileSystem fs = dirPath.getFileSystem(conf);
+ dirPath = new Path(fs.makeQualified(dirPath).toString());
+ FsPermission fsPermission = new FsPermission(scratchDirPermission);
+
+ if (!fs.mkdirs(dirPath, fsPermission)) {
+ throw new RuntimeException("Cannot make directory: "
+ + dirPath.toString());
+ }
+ if (isHDFSCleanup) {
+ fs.deleteOnExit(dirPath);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException (e);
+ }
+ }
+ dir = dirPath;
+ fsScratchDirs.put(fileSystem + "-" + TaskRunner.getTaskRunnerID(), dir);
+
+ }
+
+ return dir;
+ }
+
+
+ /**
+ * Create a local scratch directory on demand and return it.
+ */
+ public Path getLocalScratchDir(boolean mkdir) {
+ try {
+ FileSystem fs = FileSystem.getLocal(conf);
+ URI uri = fs.getUri();
+ return getScratchDir(uri.getScheme(), uri.getAuthority(),
+ mkdir, localScratchDir);
+ } catch (IOException e) {
+ throw new RuntimeException (e);
+ }
+ }
+
+
+ /**
+ * Create a map-reduce scratch directory on demand and return it.
+ *
+ */
+ public Path getMRScratchDir() {
+
+ // if we are executing entirely on the client side - then
+ // just (re)use the local scratch directory
+ if(isLocalOnlyExecutionMode()) {
+ return getLocalScratchDir(!explain);
+ }
+
+ try {
+ Path dir = FileUtils.makeQualified(nonLocalScratchPath, conf);
+ URI uri = dir.toUri();
+
+ Path newScratchDir = getScratchDir(uri.getScheme(), uri.getAuthority(),
+ !explain, uri.getPath());
+ LOG.info("New scratch dir is " + newScratchDir);
+ return newScratchDir;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ } catch (IllegalArgumentException e) {
+ throw new RuntimeException("Error while making MR scratch "
+ + "directory - check filesystem config (" + e.getCause() + ")", e);
+ }
+ }
+
+ /**
+ * Create a temporary directory depending of the path specified.
+ * - If path is an Object store filesystem, then use the default MR scratch directory (HDFS)
+ * - If path is on HDFS, then create a staging directory inside the path
+ *
+ * @param path Path used to verify the Filesystem to use for temporary directory
+ * @return A path to the new temporary directory
+ */
+ public Path getTempDirForPath(Path path) {
+ boolean isLocal = isPathLocal(path);
+ if ((BlobStorageUtils.isBlobStoragePath(conf, path) && !BlobStorageUtils.isBlobStorageAsScratchDir(conf))
+ || isLocal) {
+ // For better write performance, we use HDFS for temporary data when object store is used.
+ // Note that the scratch directory configuration variable must use HDFS or any other non-blobstorage system
+ // to take advantage of this performance.
+ return getMRTmpPath();
+ } else {
+ return getExtTmpPathRelTo(path);
+ }
+ }
+
+ /*
+ * Checks if the path is for the local filesystem or not
+ */
+ private boolean isPathLocal(Path path) {
+ boolean isLocal = false;
+ if (path != null) {
+ String scheme = path.toUri().getScheme();
+ if (scheme != null) {
+ isLocal = scheme.equals(Utilities.HADOOP_LOCAL_FS_SCHEME);
+ }
+ }
+ return isLocal;
+ }
+
+ private Path getExternalScratchDir(URI extURI) {
+ return getStagingDir(new Path(extURI.getScheme(), extURI.getAuthority(), extURI.getPath()), !explain);
+ }
+
+ /**
+ * Remove any created scratch directories.
+ */
+ public void removeScratchDir() {
+ for (Map.Entry entry : fsScratchDirs.entrySet()) {
+ try {
+ Path p = entry.getValue();
+ FileSystem fs = p.getFileSystem(conf);
+ fs.delete(p, true);
+ fs.cancelDeleteOnExit(p);
+ } catch (Exception e) {
+ LOG.warn("Error Removing Scratch: "
+ + StringUtils.stringifyException(e));
+ }
+ }
+ fsScratchDirs.clear();
+ }
+
+ /**
+ * Remove any created directories for CTEs.
+ */
+ public void removeMaterializedCTEs() {
+ // clean CTE tables
+ for (Table materializedTable : cteTables.values()) {
+ Path location = materializedTable.getDataLocation();
+ try {
+ FileSystem fs = location.getFileSystem(conf);
+ boolean status = fs.delete(location, true);
+ LOG.info("Removed " + location + " for materialized "
+ + materializedTable.getTableName() + ", status=" + status);
+ } catch (IOException e) {
+ // ignore
+ LOG.warn("Error removing " + location + " for materialized " + materializedTable.getTableName() +
+ ": " + StringUtils.stringifyException(e));
+ }
+ }
+ cteTables.clear();
+ }
+
+ private String nextPathId() {
+ return Integer.toString(pathid++);
+ }
+
+
+ private static final String MR_PREFIX = "-mr-";
+ private static final String EXT_PREFIX = "-ext-";
+ private static final String LOCAL_PREFIX = "-local-";
+
+ /**
+ * Check if path is for intermediate data
+ * @return true if a uri is a temporary uri for map-reduce intermediate data,
+ * false otherwise
+ */
+ public boolean isMRTmpFileURI(String uriStr) {
+ return (uriStr.indexOf(executionId) != -1) &&
+ (uriStr.indexOf(MR_PREFIX) != -1);
+ }
+
+ public Path getMRTmpPath(URI uri) {
+ return new Path(getStagingDir(new Path(uri), !explain), MR_PREFIX + nextPathId());
+ }
+
+ /**
+ * Get a path to store map-reduce intermediate data in.
+ *
+ * @return next available path for map-red intermediate data
+ */
+ public Path getMRTmpPath() {
+ return new Path(getMRScratchDir(), MR_PREFIX +
+ nextPathId());
+ }
+
+ /**
+ * Get a tmp path on local host to store intermediate data.
+ *
+ * @return next available tmp path on local fs
+ */
+ public Path getLocalTmpPath() {
+ return new Path(getLocalScratchDir(true), LOCAL_PREFIX + nextPathId());
+ }
+
+ /**
+ * Get a path to store tmp data destined for external Path.
+ *
+ * @param path external Path to which the tmp data has to be eventually moved
+ * @return next available tmp path on the file system corresponding extURI
+ */
+ public Path getExternalTmpPath(Path path) {
+ URI extURI = path.toUri();
+ if (extURI.getScheme().equals("viewfs")) {
+ // if we are on viewfs we don't want to use /tmp as tmp dir since rename from /tmp/..
+ // to final /user/hive/warehouse/ will fail later, so instead pick tmp dir
+ // on same namespace as tbl dir.
+ return getExtTmpPathRelTo(path.getParent());
+ }
+ return new Path(getExternalScratchDir(extURI), EXT_PREFIX +
+ nextPathId());
+ }
+
+ /**
+ * This is similar to getExternalTmpPath() with difference being this method returns temp path
+ * within passed in uri, whereas getExternalTmpPath() ignores passed in path and returns temp
+ * path within /tmp
+ */
+ public Path getExtTmpPathRelTo(Path path) {
+ return new Path(getStagingDir(path, !explain), EXT_PREFIX + nextPathId());
+ }
+
+ /**
+ * @return the resFile
+ */
+ public Path getResFile() {
+ return resFile;
+ }
+
+ /**
+ * @param resFile
+ * the resFile to set
+ */
+ public void setResFile(Path resFile) {
+ this.resFile = resFile;
+ resDir = null;
+ resDirPaths = null;
+ resDirFilesNum = 0;
+ }
+
+ /**
+ * @return the resDir
+ */
+ public Path getResDir() {
+ return resDir;
+ }
+
+ /**
+ * @param resDir
+ * the resDir to set
+ */
+ public void setResDir(Path resDir) {
+ this.resDir = resDir;
+ resFile = null;
+
+ resDirFilesNum = 0;
+ resDirPaths = null;
+ }
+
+ public void clear() throws IOException {
+ if (resDir != null) {
+ try {
+ FileSystem fs = resDir.getFileSystem(conf);
+ fs.delete(resDir, true);
+ } catch (IOException e) {
+ LOG.info("Context clear error: " + StringUtils.stringifyException(e));
+ }
+ }
+
+ if (resFile != null) {
+ try {
+ FileSystem fs = resFile.getFileSystem(conf);
+ fs.delete(resFile, false);
+ } catch (IOException e) {
+ LOG.info("Context clear error: " + StringUtils.stringifyException(e));
+ }
+ }
+ removeMaterializedCTEs();
+ removeScratchDir();
+ originalTracker = null;
+ setNeedLockMgr(false);
+ }
+
+ public DataInput getStream() {
+ try {
+ if (!initialized) {
+ initialized = true;
+ if ((resFile == null) && (resDir == null)) {
+ return null;
+ }
+
+ if (resFile != null) {
+ return resFile.getFileSystem(conf).open(resFile);
+ }
+
+ resFs = resDir.getFileSystem(conf);
+ FileStatus status = resFs.getFileStatus(resDir);
+ assert status.isDir();
+ FileStatus[] resDirFS = resFs.globStatus(new Path(resDir + "/*"), FileUtils.HIDDEN_FILES_PATH_FILTER);
+ resDirPaths = new Path[resDirFS.length];
+ int pos = 0;
+ for (FileStatus resFS : resDirFS) {
+ if (!resFS.isDir()) {
+ resDirPaths[pos++] = resFS.getPath();
+ }
+ }
+ if (pos == 0) {
+ return null;
+ }
+
+ return resFs.open(resDirPaths[resDirFilesNum++]);
+ } else {
+ return getNextStream();
+ }
+ } catch (FileNotFoundException e) {
+ LOG.info("getStream error: " + StringUtils.stringifyException(e));
+ return null;
+ } catch (IOException e) {
+ LOG.info("getStream error: " + StringUtils.stringifyException(e));
+ return null;
+ }
+ }
+
+ private DataInput getNextStream() {
+ try {
+ if (resDir != null && resDirFilesNum < resDirPaths.length
+ && (resDirPaths[resDirFilesNum] != null)) {
+ return resFs.open(resDirPaths[resDirFilesNum++]);
+ }
+ } catch (FileNotFoundException e) {
+ LOG.info("getNextStream error: " + StringUtils.stringifyException(e));
+ return null;
+ } catch (IOException e) {
+ LOG.info("getNextStream error: " + StringUtils.stringifyException(e));
+ return null;
+ }
+
+ return null;
+ }
+
+ public void resetStream() {
+ if (initialized) {
+ resDirFilesNum = 0;
+ initialized = false;
+ }
+ }
+
+ /**
+ * Little abbreviation for StringUtils.
+ */
+ private static boolean strEquals(String str1, String str2) {
+ return org.apache.commons.lang.StringUtils.equals(str1, str2);
+ }
+
+ /**
+ * Set the token rewrite stream being used to parse the current top-level SQL
+ * statement. Note that this should not be used for other parsing
+ * activities; for example, when we encounter a reference to a view, we switch
+ * to a new stream for parsing the stored view definition from the catalog,
+ * but we don't clobber the top-level stream in the context.
+ *
+ * @param tokenRewriteStream
+ * the stream being used
+ */
+ public void setTokenRewriteStream(TokenRewriteStream tokenRewriteStream) {
+ assert (this.tokenRewriteStream == null);
+ this.tokenRewriteStream = tokenRewriteStream;
+ }
+
+ /**
+ * @return the token rewrite stream being used to parse the current top-level
+ * SQL statement, or null if it isn't available (e.g. for parser
+ * tests)
+ */
+ public TokenRewriteStream getTokenRewriteStream() {
+ return tokenRewriteStream;
+ }
+
+ /**
+ * Generate a unique executionId. An executionId, together with user name and
+ * the configuration, will determine the temporary locations of all intermediate
+ * files.
+ *
+ * In the future, users can use the executionId to resume a query.
+ */
+ public static String generateExecutionId() {
+ Random rand = new Random();
+ SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd_HH-mm-ss_SSS");
+ String executionId = "hive_" + format.format(new Date()) + "_"
+ + Math.abs(rand.nextLong());
+ return executionId;
+ }
+
+ /**
+ * Does Hive wants to run tasks entirely on the local machine
+ * (where the query is being compiled)?
+ *
+ * Today this translates into running hadoop jobs locally
+ */
+ public boolean isLocalOnlyExecutionMode() {
+ // Always allow spark to run in a cluster mode. Without this, depending on
+ // user's local hadoop settings, true may be returned, which causes plan to be
+ // stored in local path.
+ if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
+ return false;
+ }
+
+ return ShimLoader.getHadoopShims().isLocalMode(conf);
+ }
+
+ public List getHiveLocks() {
+ return hiveLocks;
+ }
+
+ public void setHiveLocks(List hiveLocks) {
+ this.hiveLocks = hiveLocks;
+ }
+
+ public HiveTxnManager getHiveTxnManager() {
+ return hiveTxnManager;
+ }
+
+ public void setHiveTxnManager(HiveTxnManager txnMgr) {
+ hiveTxnManager = txnMgr;
+ }
+
+ public void setOriginalTracker(String originalTracker) {
+ this.originalTracker = originalTracker;
+ }
+
+ public void restoreOriginalTracker() {
+ if (originalTracker != null) {
+ ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, originalTracker);
+ originalTracker = null;
+ }
+ }
+
+ public void addCS(String path, ContentSummary cs) {
+ pathToCS.put(path, cs);
+ }
+
+ public ContentSummary getCS(Path path) {
+ return getCS(path.toString());
+ }
+
+ public ContentSummary getCS(String path) {
+ return pathToCS.get(path);
+ }
+
+ public Map getPathToCS() {
+ return pathToCS;
+ }
+
+ public Configuration getConf() {
+ return conf;
+ }
+
+ /**
+ * @return the isHDFSCleanup
+ */
+ public boolean isHDFSCleanup() {
+ return isHDFSCleanup;
+ }
+
+ /**
+ * @param isHDFSCleanup the isHDFSCleanup to set
+ */
+ public void setHDFSCleanup(boolean isHDFSCleanup) {
+ this.isHDFSCleanup = isHDFSCleanup;
+ }
+
+ public boolean isNeedLockMgr() {
+ return needLockMgr;
+ }
+
+ public void setNeedLockMgr(boolean needLockMgr) {
+ this.needLockMgr = needLockMgr;
+ }
+
+ public int getTryCount() {
+ return tryCount;
+ }
+
+ public void setTryCount(int tryCount) {
+ this.tryCount = tryCount;
+ }
+
+ public void setAcidOperation(AcidUtils.Operation op) {
+ acidOperation = op;
+ }
+
+ public AcidUtils.Operation getAcidOperation() {
+ return acidOperation;
+ }
+
+ public String getCboInfo() {
+ return cboInfo;
+ }
+
+ public void setCboInfo(String cboInfo) {
+ this.cboInfo = cboInfo;
+ }
+
+ public boolean isCboSucceeded() {
+ return cboSucceeded;
+ }
+
+ public void setCboSucceeded(boolean cboSucceeded) {
+ this.cboSucceeded = cboSucceeded;
+ }
+
+ public Table getMaterializedTable(String cteName) {
+ return cteTables.get(cteName);
+ }
+
+ public void addMaterializedTable(String cteName, Table table) {
+ cteTables.put(cteName, table);
+ }
+
+ public AtomicInteger getSequencer() {
+ return sequencer;
+ }
+
+ public CompilationOpContext getOpContext() {
+ return opContext;
+ }
+
+ public Heartbeater getHeartbeater() {
+ return heartbeater;
+ }
+
+ public void setHeartbeater(Heartbeater heartbeater) {
+ this.heartbeater = heartbeater;
+ }
+
+ public void checkHeartbeaterLockException() throws LockException {
+ if (getHeartbeater() != null && getHeartbeater().getLockException() != null) {
+ throw getHeartbeater().getLockException();
+ }
+ }
+
+ public boolean isSkipTableMasking() {
+ return skipTableMasking;
+ }
+
+ public void setSkipTableMasking(boolean skipTableMasking) {
+ this.skipTableMasking = skipTableMasking;
+ }
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
index eefa1d9..d60233f 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
@@ -43,6 +43,8 @@
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang.StringUtils;
+import org.apache.tez.runtime.library.cartesianproduct.CartesianProductConfig;
+import org.apache.tez.runtime.library.cartesianproduct.CartesianProductEdgeManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
@@ -132,6 +134,7 @@
import org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig;
import org.apache.tez.runtime.library.conf.UnorderedPartitionedKVEdgeConfig;
import org.apache.tez.runtime.library.input.ConcatenatedMergedKeyValueInput;
+import org.apache.tez.runtime.library.cartesianproduct.CartesianProductVertexManager;
/**
* DagUtils. DagUtils is a collection of helper methods to convert
@@ -261,7 +264,7 @@ private JobConf initializeVertexConf(JobConf baseConf, Context context, MapWork
*/
@SuppressWarnings("rawtypes")
public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w,
- TezEdgeProperty edgeProp, VertexType vertexType)
+ TezEdgeProperty edgeProp, VertexType vertexType, BaseWork work, TezWork tezWork)
throws IOException {
Class mergeInputClass;
@@ -292,6 +295,9 @@ public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w,
mergeInputClass = ConcatenatedMergedKeyValueInput.class;
break;
+ case XPROD_EDGE:
+ // fall through
+
case SIMPLE_EDGE:
setupAutoReducerParallelism(edgeProp, w);
// fall through
@@ -301,7 +307,7 @@ public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w,
break;
}
- return GroupInputEdge.create(group, w, createEdgeProperty(edgeProp, vConf),
+ return GroupInputEdge.create(group, w, createEdgeProperty(w, edgeProp, vConf, work, tezWork),
InputDescriptor.create(mergeInputClass.getName()));
}
@@ -315,7 +321,7 @@ public GroupInputEdge createEdge(VertexGroup group, JobConf vConf, Vertex w,
* @return
*/
public Edge createEdge(JobConf vConf, Vertex v, Vertex w, TezEdgeProperty edgeProp,
- VertexType vertexType)
+ VertexType vertexType, BaseWork work, TezWork tezWork)
throws IOException {
switch(edgeProp.getEdgeType()) {
@@ -332,6 +338,9 @@ public Edge createEdge(JobConf vConf, Vertex v, Vertex w, TezEdgeProperty edgePr
w.setVertexManagerPlugin(desc);
break;
}
+ case XPROD_EDGE:
+ break;
+
case SIMPLE_EDGE: {
setupAutoReducerParallelism(edgeProp, w);
break;
@@ -340,14 +349,15 @@ public Edge createEdge(JobConf vConf, Vertex v, Vertex w, TezEdgeProperty edgePr
// nothing
}
- return Edge.create(v, w, createEdgeProperty(edgeProp, vConf));
+ return Edge.create(v, w, createEdgeProperty(w, edgeProp, vConf, work, tezWork));
}
/*
* Helper function to create an edge property from an edge type.
*/
- private EdgeProperty createEdgeProperty(TezEdgeProperty edgeProp, Configuration conf)
- throws IOException {
+ private EdgeProperty createEdgeProperty(Vertex w,TezEdgeProperty edgeProp,
+ Configuration conf, BaseWork work, TezWork tezWork)
+ throws IOException {
MRHelpers.translateMRConfToTez(conf);
String keyClass = conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS);
String valClass = conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS);
@@ -392,7 +402,22 @@ private EdgeProperty createEdgeProperty(TezEdgeProperty edgeProp, Configuration
.setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null)
.build();
return et3Conf.createDefaultEdgeProperty();
+ case XPROD_EDGE:
+ EdgeManagerPluginDescriptor edgeManagerDescriptor =
+ EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName());
+ List crossProductSources = new ArrayList<>();
+ for (BaseWork parentWork : tezWork.getParents(work)) {
+ if (tezWork.getEdgeType(parentWork, work) == EdgeType.XPROD_EDGE) {
+ crossProductSources.add(parentWork.getName());
+ }
+ }
+ CartesianProductConfig cpConfig = new CartesianProductConfig(crossProductSources);
+ edgeManagerDescriptor.setUserPayload(cpConfig.toUserPayload(null));
+ UnorderedKVEdgeConfig cpEdgeConf =
+ UnorderedKVEdgeConfig.newBuilder(keyClass, valClass).build();
+ return cpEdgeConf.createDefaultCustomEdgeProperty(edgeManagerDescriptor);
case SIMPLE_EDGE:
+ // fallthrough
default:
assert partitionerClassName != null;
partitionerConf = createPartitionerConf(partitionerClassName, conf);
@@ -1154,6 +1179,20 @@ public Vertex createVertex(JobConf conf, BaseWork work,
} else if (work instanceof MergeJoinWork) {
v = createVertex(conf, (MergeJoinWork) work, appJarLr, additionalLr, fileSystem, scratchDir,
ctx, vertexType);
+ // set VertexManagerPlugin if whether it's a cross product destination vertex
+ List crossProductSources = new ArrayList<>();
+ for (BaseWork parentWork : tezWork.getParents(work)) {
+ if (tezWork.getEdgeType(parentWork, work) == EdgeType.XPROD_EDGE) {
+ crossProductSources.add(parentWork.getName());
+ }
+ }
+
+ if (!crossProductSources.isEmpty()) {
+ CartesianProductConfig cpConfig = new CartesianProductConfig(crossProductSources);
+ v.setVertexManagerPlugin(
+ VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName())
+ .setUserPayload(cpConfig.toUserPayload(null)));
+ }
} else {
// something is seriously wrong if this is happening
throw new HiveException(ErrorMsg.GENERIC_ERROR.getErrorCodedMsg());
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
index 25c4514..994b4ea 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
@@ -381,7 +381,7 @@ DAG build(JobConf conf, TezWork work, Path scratchDir,
for (BaseWork v: children) {
// finally we can create the grouped edge
GroupInputEdge e = utils.createEdge(group, parentConf,
- workToVertex.get(v), work.getEdgeProperty(w, v), work.getVertexType(v));
+ workToVertex.get(v), work.getEdgeProperty(w, v), work.getVertexType(v), v, work);
dag.addEdge(e);
}
@@ -409,8 +409,8 @@ DAG build(JobConf conf, TezWork work, Path scratchDir,
Edge e = null;
TezEdgeProperty edgeProp = work.getEdgeProperty(w, v);
-
- e = utils.createEdge(wxConf, wx, workToVertex.get(v), edgeProp, work.getVertexType(v));
+ e = utils.createEdge(wxConf, wx, workToVertex.get(v), edgeProp, work.getVertexType(v),
+ v, work);
dag.addEdge(e);
}
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java
index 9ad33fd..7e003f0 100644
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java
@@ -41,6 +41,8 @@
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
import org.apache.hadoop.hive.ql.exec.tez.TezTask;
+import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType;
+import org.apache.hadoop.hive.ql.plan.TezEdgeProperty;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
@@ -119,10 +121,10 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs)
}
} else if (currTask instanceof TezTask) {
- TezTask tzTask = (TezTask) currTask;
- TezWork tzWrk = tzTask.getWork();
- checkMapJoins(tzWrk);
- checkTezReducer(tzWrk);
+ TezTask tezTask = (TezTask) currTask;
+ TezWork tezWork = tezTask.getWork();
+ checkMapJoins(tezWork);
+ checkTezReducer(tezWork);
}
return null;
}
@@ -152,8 +154,8 @@ private void checkMapJoins(MapRedTask mrTsk) throws SemanticException {
}
}
- private void checkMapJoins(TezWork tzWrk) throws SemanticException {
- for(BaseWork wrk : tzWrk.getAllWork() ) {
+ private void checkMapJoins(TezWork tezWork) throws SemanticException {
+ for(BaseWork wrk : tezWork.getAllWork() ) {
if ( wrk instanceof MergeJoinWork ) {
wrk = ((MergeJoinWork)wrk).getMainWork();
@@ -168,10 +170,12 @@ private void checkMapJoins(TezWork tzWrk) throws SemanticException {
}
}
- private void checkTezReducer(TezWork tzWrk) throws SemanticException {
- for(BaseWork wrk : tzWrk.getAllWork() ) {
+ private void checkTezReducer(TezWork tezWork) throws SemanticException {
+ for(BaseWork wrk : tezWork.getAllWork() ) {
+ BaseWork origWrk = null;
if ( wrk instanceof MergeJoinWork ) {
+ origWrk = wrk;
wrk = ((MergeJoinWork)wrk).getMainWork();
}
@@ -182,11 +186,21 @@ private void checkTezReducer(TezWork tzWrk) throws SemanticException {
Operator extends OperatorDesc> reducer = ((ReduceWork)wrk).getReducer();
if ( reducer instanceof JoinOperator || reducer instanceof CommonMergeJoinOperator ) {
Map rsInfo =
- new HashMap();
+ new HashMap();
for(Map.Entry e : rWork.getTagToInput().entrySet()) {
- rsInfo.putAll(getReducerInfo(tzWrk, rWork.getName(), e.getValue()));
+ rsInfo.putAll(getReducerInfo(tezWork, rWork.getName(), e.getValue()));
+ }
+ if (checkForCrossProduct(rWork.getName(), reducer, rsInfo)) {
+ //
+ List parents = tezWork.getParents(origWrk == null ? wrk : origWrk);
+ for (BaseWork p: parents) {
+ TezEdgeProperty prop = tezWork.getEdgeProperty(p, origWrk == null ? wrk : origWrk);
+ LOG.info("Edge Type: "+prop.getEdgeType());
+ if (prop.getEdgeType().equals(EdgeType.SIMPLE_EDGE)) {
+ prop.setEdgeType(EdgeType.XPROD_EDGE);
+ }
+ }
}
- checkForCrossProduct(rWork.getName(), reducer, rsInfo);
}
}
}
@@ -198,17 +212,17 @@ private void checkMRReducer(String taskName, MapredWork mrWrk) throws SemanticEx
}
Operator extends OperatorDesc> reducer = rWrk.getReducer();
if ( reducer instanceof JoinOperator|| reducer instanceof CommonMergeJoinOperator ) {
- BaseWork prntWork = mrWrk.getMapWork();
+ BaseWork parentWork = mrWrk.getMapWork();
checkForCrossProduct(taskName, reducer,
- new ExtractReduceSinkInfo(null).analyze(prntWork));
+ new ExtractReduceSinkInfo(null).analyze(parentWork));
}
}
- private void checkForCrossProduct(String taskName,
+ private boolean checkForCrossProduct(String taskName,
Operator extends OperatorDesc> reducer,
Map rsInfo) {
if ( rsInfo.isEmpty() ) {
- return;
+ return false;
}
Iterator it = rsInfo.values().iterator();
ExtractReduceSinkInfo.Info info = it.next();
@@ -225,13 +239,15 @@ private void checkForCrossProduct(String taskName,
iAliases,
taskName);
warn(warning);
+ return true;
}
+ return false;
}
- private Map getReducerInfo(TezWork tzWrk, String vertex, String prntVertex)
- throws SemanticException {
- BaseWork prntWork = tzWrk.getWorkMap().get(prntVertex);
- return new ExtractReduceSinkInfo(vertex).analyze(prntWork);
+ private Map getReducerInfo(TezWork tezWork, String vertex, String prntVertex)
+ throws SemanticException {
+ BaseWork parentWork = tezWork.getWorkMap().get(prntVertex);
+ return new ExtractReduceSinkInfo(vertex).analyze(parentWork);
}
/*
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java.orig ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java.orig
new file mode 100644
index 0000000..699bb11
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java.orig
@@ -0,0 +1,13038 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.parse;
+
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.Serializable;
+import java.security.AccessControlException;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Queue;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.UUID;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import org.antlr.runtime.ClassicToken;
+import org.antlr.runtime.CommonToken;
+import org.antlr.runtime.Token;
+import org.antlr.runtime.tree.Tree;
+import org.antlr.runtime.tree.TreeWizard;
+import org.antlr.runtime.tree.TreeWizard.ContextVisitor;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.hive.common.BlobStorageUtils;
+import org.apache.hadoop.hive.ql.plan.AlterTableDesc;
+import org.apache.hadoop.hive.common.FileUtils;
+import org.apache.hadoop.hive.common.ObjectPair;
+import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.common.StatsSetupConst.StatDB;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.conf.HiveConf.StrictChecks;
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.Order;
+import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
+import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
+import org.apache.hadoop.hive.ql.CompilationOpContext;
+import org.apache.hadoop.hive.ql.ErrorMsg;
+import org.apache.hadoop.hive.ql.QueryProperties;
+import org.apache.hadoop.hive.ql.QueryState;
+import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.ArchiveUtils;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
+import org.apache.hadoop.hive.ql.exec.FetchTask;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.exec.RecordReader;
+import org.apache.hadoop.hive.ql.exec.RecordWriter;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.hooks.ReadEntity;
+import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.io.AcidUtils.Operation;
+import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
+import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
+import org.apache.hadoop.hive.ql.io.NullRowsInputFormat;
+import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
+import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.metadata.DummyPartition;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.HiveUtils;
+import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
+import org.apache.hadoop.hive.ql.optimizer.Optimizer;
+import org.apache.hadoop.hive.ql.optimizer.Transform;
+import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
+import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
+import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc;
+import org.apache.hadoop.hive.ql.optimizer.lineage.Generator;
+import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec.SpecType;
+import org.apache.hadoop.hive.ql.parse.CalcitePlanner.ASTSearcher;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFInputSpec;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputSpec;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputType;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionedTableFunctionSpec;
+import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec;
+import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType;
+import org.apache.hadoop.hive.ql.parse.SubQueryUtils.ISubQueryJoinInfo;
+import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec;
+import org.apache.hadoop.hive.ql.parse.WindowingSpec.CurrentRowSpec;
+import org.apache.hadoop.hive.ql.parse.WindowingSpec.Direction;
+import org.apache.hadoop.hive.ql.parse.WindowingSpec.RangeBoundarySpec;
+import org.apache.hadoop.hive.ql.parse.WindowingSpec.ValueBoundarySpec;
+import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec;
+import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFrameSpec;
+import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec;
+import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes;
+import org.apache.hadoop.hive.ql.plan.CreateTableDesc;
+import org.apache.hadoop.hive.ql.plan.CreateTableLikeDesc;
+import org.apache.hadoop.hive.ql.plan.CreateViewDesc;
+import org.apache.hadoop.hive.ql.plan.DDLWork;
+import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
+import org.apache.hadoop.hive.ql.plan.FilterDesc;
+import org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc;
+import org.apache.hadoop.hive.ql.plan.ForwardDesc;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.HiveOperation;
+import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
+import org.apache.hadoop.hive.ql.plan.JoinDesc;
+import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc;
+import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
+import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
+import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
+import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
+import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.PTFDesc;
+import org.apache.hadoop.hive.ql.plan.PlanUtils;
+import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
+import org.apache.hadoop.hive.ql.plan.ScriptDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.ql.plan.UnionDesc;
+import org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef;
+import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef;
+import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.session.SessionState.ResourceType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hadoop.hive.ql.util.ResourceDownloader;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
+import org.apache.hadoop.hive.serde2.NoOpFetchFormatter;
+import org.apache.hadoop.hive.serde2.NullStructSerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.shims.HadoopShims;
+import org.apache.hadoop.hive.shims.Utils;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.OutputFormat;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import com.google.common.collect.Sets;
+
+/**
+ * Implementation of the semantic analyzer. It generates the query plan.
+ * There are other specific semantic analyzers for some hive operations such as
+ * DDLSemanticAnalyzer for ddl operations.
+ */
+
+public class SemanticAnalyzer extends BaseSemanticAnalyzer {
+
+ public static final String DUMMY_DATABASE = "_dummy_database";
+ public static final String DUMMY_TABLE = "_dummy_table";
+ public static final String SUBQUERY_TAG_1 = "-subquery1";
+ public static final String SUBQUERY_TAG_2 = "-subquery2";
+
+ // Max characters when auto generating the column name with func name
+ private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20;
+
+ private static final String VALUES_TMP_TABLE_NAME_PREFIX = "Values__Tmp__Table__";
+
+ static final String MATERIALIZATION_MARKER = "$MATERIALIZATION";
+
+ private HashMap opToPartPruner;
+ private HashMap opToPartList;
+ protected HashMap topOps;
+ protected LinkedHashMap, OpParseContext> opParseCtx;
+ private List loadTableWork;
+ private List loadFileWork;
+ private List columnStatsAutoGatherContexts;
+ private final Map joinContext;
+ private final Map smbMapJoinContext;
+ private final HashMap topToTable;
+ private final Map fsopToTable;
+ private final List reduceSinkOperatorsAddedByEnforceBucketingSorting;
+ private final HashMap> topToTableProps;
+ private QB qb;
+ private ASTNode ast;
+ private int destTableId;
+ private UnionProcContext uCtx;
+ List> listMapJoinOpsNoReducer;
+ private HashMap opToSamplePruner;
+ private final Map> opToPartToSkewedPruner;
+ private Map viewProjectToTableSchema;
+ /**
+ * a map for the split sampling, from alias to an instance of SplitSample
+ * that describes percentage and number.
+ */
+ private final HashMap nameToSplitSample;
+ Map> groupOpToInputTables;
+ Map prunedPartitions;
+ protected List resultSchema;
+ private CreateViewDesc createVwDesc;
+ private ArrayList viewsExpanded;
+ private ASTNode viewSelect;
+ protected final UnparseTranslator unparseTranslator;
+ private final GlobalLimitCtx globalLimitCtx;
+
+ // prefix for column names auto generated by hive
+ private final String autogenColAliasPrfxLbl;
+ private final boolean autogenColAliasPrfxIncludeFuncName;
+
+ // Keep track of view alias to read entity corresponding to the view
+ // For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
+ // keeps track of aliases for V3, V3:V2, V3:V2:V1.
+ // This is used when T is added as an input for the query, the parents of T is
+ // derived from the alias V3:V2:V1:T
+ private final Map viewAliasToInput;
+
+ //need merge isDirect flag to input even if the newInput does not have a parent
+ private boolean mergeIsDirect;
+
+ // flag for no scan during analyze ... compute statistics
+ protected boolean noscan;
+
+ //flag for partial scan during analyze ... compute statistics
+ protected boolean partialscan;
+
+ protected volatile boolean disableJoinMerge = false;
+
+ /*
+ * Capture the CTE definitions in a Query.
+ */
+ final Map aliasToCTEs;
+
+ /*
+ * Used to check recursive CTE invocations. Similar to viewsExpanded
+ */
+ ArrayList ctesExpanded;
+
+ /*
+ * Whether root tasks after materialized CTE linkage have been resolved
+ */
+ boolean rootTasksResolved;
+
+ protected TableMask tableMask;
+
+ CreateTableDesc tableDesc;
+
+ /** Not thread-safe. */
+ final ASTSearcher astSearcher = new ASTSearcher();
+
+ protected AnalyzeRewriteContext analyzeRewrite;
+
+ // A mapping from a tableName to a table object in metastore.
+ Map tabNameToTabObject;
+
+ // The tokens we should ignore when we are trying to do table masking.
+ private final Set ignoredTokens = Sets.newHashSet(HiveParser.TOK_GROUPBY,
+ HiveParser.TOK_ORDERBY, HiveParser.TOK_WINDOWSPEC, HiveParser.TOK_CLUSTERBY,
+ HiveParser.TOK_DISTRIBUTEBY, HiveParser.TOK_SORTBY);
+
+ static class Phase1Ctx {
+ String dest;
+ int nextNum;
+ }
+
+ public SemanticAnalyzer(QueryState queryState) throws SemanticException {
+ super(queryState);
+ opToPartPruner = new HashMap();
+ opToPartList = new HashMap();
+ opToSamplePruner = new HashMap();
+ nameToSplitSample = new HashMap();
+ // Must be deterministic order maps - see HIVE-8707
+ topOps = new LinkedHashMap();
+ loadTableWork = new ArrayList();
+ loadFileWork = new ArrayList();
+ columnStatsAutoGatherContexts = new ArrayList();
+ opParseCtx = new LinkedHashMap, OpParseContext>();
+ joinContext = new HashMap();
+ smbMapJoinContext = new HashMap();
+ // Must be deterministic order map for consistent q-test output across Java versions
+ topToTable = new LinkedHashMap();
+ fsopToTable = new HashMap();
+ reduceSinkOperatorsAddedByEnforceBucketingSorting = new ArrayList();
+ topToTableProps = new HashMap>();
+ destTableId = 1;
+ uCtx = null;
+ listMapJoinOpsNoReducer = new ArrayList>();
+ groupOpToInputTables = new HashMap>();
+ prunedPartitions = new HashMap();
+ tabNameToTabObject = new HashMap();
+ unparseTranslator = new UnparseTranslator(conf);
+ autogenColAliasPrfxLbl = HiveConf.getVar(conf,
+ HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL);
+ autogenColAliasPrfxIncludeFuncName = HiveConf.getBoolVar(conf,
+ HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME);
+ queryProperties = new QueryProperties();
+ opToPartToSkewedPruner = new HashMap>();
+ aliasToCTEs = new HashMap();
+ globalLimitCtx = new GlobalLimitCtx();
+ viewAliasToInput = new HashMap();
+ mergeIsDirect = true;
+ noscan = partialscan = false;
+ tabNameToTabObject = new HashMap<>();
+ }
+
+ @Override
+ protected void reset(boolean clearPartsCache) {
+ super.reset(true);
+ if(clearPartsCache) {
+ prunedPartitions.clear();
+
+ //When init(true) combine with genResolvedParseTree, it will generate Resolved Parse tree from syntax tree
+ //ReadEntity created under these conditions should be all relevant to the syntax tree even the ones without parents
+ //set mergeIsDirect to true here.
+ mergeIsDirect = true;
+ } else {
+ mergeIsDirect = false;
+ }
+ tabNameToTabObject.clear();
+ loadTableWork.clear();
+ loadFileWork.clear();
+ columnStatsAutoGatherContexts.clear();
+ topOps.clear();
+ destTableId = 1;
+ idToTableNameMap.clear();
+ qb = null;
+ ast = null;
+ uCtx = null;
+ joinContext.clear();
+ smbMapJoinContext.clear();
+ opParseCtx.clear();
+ groupOpToInputTables.clear();
+ disableJoinMerge = false;
+ aliasToCTEs.clear();
+ topToTable.clear();
+ opToPartPruner.clear();
+ opToPartList.clear();
+ opToPartToSkewedPruner.clear();
+ opToSamplePruner.clear();
+ nameToSplitSample.clear();
+ fsopToTable.clear();
+ resultSchema = null;
+ createVwDesc = null;
+ viewsExpanded = null;
+ viewSelect = null;
+ ctesExpanded = null;
+ globalLimitCtx.disableOpt();
+ viewAliasToInput.clear();
+ reduceSinkOperatorsAddedByEnforceBucketingSorting.clear();
+ topToTableProps.clear();
+ listMapJoinOpsNoReducer.clear();
+ unparseTranslator.clear();
+ queryProperties.clear();
+ outputs.clear();
+ }
+
+ public void initParseCtx(ParseContext pctx) {
+ opToPartPruner = pctx.getOpToPartPruner();
+ opToPartList = pctx.getOpToPartList();
+ opToSamplePruner = pctx.getOpToSamplePruner();
+ topOps = pctx.getTopOps();
+ loadTableWork = pctx.getLoadTableWork();
+ loadFileWork = pctx.getLoadFileWork();
+ ctx = pctx.getContext();
+ destTableId = pctx.getDestTableId();
+ idToTableNameMap = pctx.getIdToTableNameMap();
+ uCtx = pctx.getUCtx();
+ listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer();
+ prunedPartitions = pctx.getPrunedPartitions();
+ tabNameToTabObject = pctx.getTabNameToTabObject();
+ fetchTask = pctx.getFetchTask();
+ setLineageInfo(pctx.getLineageInfo());
+ }
+
+ public ParseContext getParseContext() {
+ // Make sure the basic query properties are initialized
+ copyInfoToQueryProperties(queryProperties);
+ return new ParseContext(queryState, opToPartPruner, opToPartList, topOps,
+ new HashSet(joinContext.keySet()),
+ new HashSet(smbMapJoinContext.keySet()),
+ loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx,
+ listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject,
+ opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks,
+ opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting,
+ analyzeRewrite, tableDesc, queryProperties, viewProjectToTableSchema, acidFileSinks);
+ }
+
+ public CompilationOpContext getOpContext() {
+ return ctx.getOpContext();
+ }
+
+ public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias)
+ throws SemanticException {
+ doPhase1QBExpr(ast, qbexpr, id, alias, false);
+ }
+ @SuppressWarnings("nls")
+ public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias, boolean insideView)
+ throws SemanticException {
+
+ assert (ast.getToken() != null);
+ switch (ast.getToken().getType()) {
+ case HiveParser.TOK_QUERY: {
+ QB qb = new QB(id, alias, true);
+ qb.setInsideView(insideView);
+ Phase1Ctx ctx_1 = initPhase1Ctx();
+ doPhase1(ast, qb, ctx_1, null);
+
+ qbexpr.setOpcode(QBExpr.Opcode.NULLOP);
+ qbexpr.setQB(qb);
+ }
+ break;
+ case HiveParser.TOK_UNIONALL: {
+ qbexpr.setOpcode(QBExpr.Opcode.UNION);
+ // query 1
+ assert (ast.getChild(0) != null);
+ QBExpr qbexpr1 = new QBExpr(alias + SUBQUERY_TAG_1);
+ doPhase1QBExpr((ASTNode) ast.getChild(0), qbexpr1, id + SUBQUERY_TAG_1,
+ alias + SUBQUERY_TAG_1, insideView);
+ qbexpr.setQBExpr1(qbexpr1);
+
+ // query 2
+ assert (ast.getChild(1) != null);
+ QBExpr qbexpr2 = new QBExpr(alias + SUBQUERY_TAG_2);
+ doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id + SUBQUERY_TAG_2,
+ alias + SUBQUERY_TAG_2, insideView);
+ qbexpr.setQBExpr2(qbexpr2);
+ }
+ break;
+ }
+ }
+
+ private LinkedHashMap doPhase1GetAggregationsFromSelect(
+ ASTNode selExpr, QB qb, String dest) throws SemanticException {
+
+ // Iterate over the selects search for aggregation Trees.
+ // Use String as keys to eliminate duplicate trees.
+ LinkedHashMap aggregationTrees = new LinkedHashMap();
+ List wdwFns = new ArrayList();
+ for (int i = 0; i < selExpr.getChildCount(); ++i) {
+ ASTNode function = (ASTNode) selExpr.getChild(i);
+ if (function.getType() == HiveParser.TOK_SELEXPR ||
+ function.getType() == HiveParser.TOK_SUBQUERY_EXPR) {
+ function = (ASTNode)function.getChild(0);
+ }
+ doPhase1GetAllAggregations(function, aggregationTrees, wdwFns);
+ }
+
+ // window based aggregations are handled differently
+ for (ASTNode wdwFn : wdwFns) {
+ WindowingSpec spec = qb.getWindowingSpec(dest);
+ if(spec == null) {
+ queryProperties.setHasWindowing(true);
+ spec = new WindowingSpec();
+ qb.addDestToWindowingSpec(dest, spec);
+ }
+ HashMap wExprsInDest = qb.getParseInfo().getWindowingExprsForClause(dest);
+ int wColIdx = spec.getWindowExpressions() == null ? 0 : spec.getWindowExpressions().size();
+ WindowFunctionSpec wFnSpec = processWindowFunction(wdwFn,
+ (ASTNode)wdwFn.getChild(wdwFn.getChildCount()-1));
+ // If this is a duplicate invocation of a function; don't add to WindowingSpec.
+ if ( wExprsInDest != null &&
+ wExprsInDest.containsKey(wFnSpec.getExpression().toStringTree())) {
+ continue;
+ }
+ wFnSpec.setAlias(wFnSpec.getName() + "_window_" + wColIdx);
+ spec.addWindowFunction(wFnSpec);
+ qb.getParseInfo().addWindowingExprToClause(dest, wFnSpec.getExpression());
+ }
+
+ return aggregationTrees;
+ }
+
+ private void doPhase1GetColumnAliasesFromSelect(
+ ASTNode selectExpr, QBParseInfo qbp) {
+ for (int i = 0; i < selectExpr.getChildCount(); ++i) {
+ ASTNode selExpr = (ASTNode) selectExpr.getChild(i);
+ if ((selExpr.getToken().getType() == HiveParser.TOK_SELEXPR)
+ && (selExpr.getChildCount() == 2)) {
+ String columnAlias = unescapeIdentifier(selExpr.getChild(1).getText());
+ qbp.setExprToColumnAlias((ASTNode) selExpr.getChild(0), columnAlias);
+ }
+ }
+ }
+
+ /**
+ * DFS-scan the expressionTree to find all aggregation subtrees and put them
+ * in aggregations.
+ *
+ * @param expressionTree
+ * @param aggregations
+ * the key to the HashTable is the toStringTree() representation of
+ * the aggregation subtree.
+ * @throws SemanticException
+ */
+ private void doPhase1GetAllAggregations(ASTNode expressionTree,
+ HashMap aggregations, List wdwFns) throws SemanticException {
+ int exprTokenType = expressionTree.getToken().getType();
+ if (exprTokenType == HiveParser.TOK_FUNCTION
+ || exprTokenType == HiveParser.TOK_FUNCTIONDI
+ || exprTokenType == HiveParser.TOK_FUNCTIONSTAR) {
+ assert (expressionTree.getChildCount() != 0);
+ if (expressionTree.getChild(expressionTree.getChildCount()-1).getType()
+ == HiveParser.TOK_WINDOWSPEC) {
+ // If it is a windowing spec, we include it in the list
+ // Further, we will examine its children AST nodes to check whether
+ // there are aggregation functions within
+ wdwFns.add(expressionTree);
+ doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(expressionTree.getChildCount()-1),
+ aggregations, wdwFns);
+ return;
+ }
+ if (expressionTree.getChild(0).getType() == HiveParser.Identifier) {
+ String functionName = unescapeIdentifier(expressionTree.getChild(0)
+ .getText());
+ // Validate the function name
+ if (FunctionRegistry.getFunctionInfo(functionName) == null) {
+ throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(functionName));
+ }
+ if(FunctionRegistry.impliesOrder(functionName)) {
+ throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName));
+ }
+ if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) {
+ if(containsLeadLagUDF(expressionTree)) {
+ throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName));
+ }
+ aggregations.put(expressionTree.toStringTree(), expressionTree);
+ FunctionInfo fi = FunctionRegistry.getFunctionInfo(functionName);
+ if (!fi.isNative()) {
+ unparseTranslator.addIdentifierTranslation((ASTNode) expressionTree
+ .getChild(0));
+ }
+ return;
+ }
+ }
+ }
+ for (int i = 0; i < expressionTree.getChildCount(); i++) {
+ doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(i),
+ aggregations, wdwFns);
+ }
+ }
+
+ private List doPhase1GetDistinctFuncExprs(
+ HashMap aggregationTrees) throws SemanticException {
+ List exprs = new ArrayList();
+ for (Map.Entry entry : aggregationTrees.entrySet()) {
+ ASTNode value = entry.getValue();
+ assert (value != null);
+ if (value.getToken().getType() == HiveParser.TOK_FUNCTIONDI) {
+ exprs.add(value);
+ }
+ }
+ return exprs;
+ }
+
+ public static String generateErrorMessage(ASTNode ast, String message) {
+ StringBuilder sb = new StringBuilder();
+ if (ast == null) {
+ sb.append(message).append(". Cannot tell the position of null AST.");
+ return sb.toString();
+ }
+ sb.append(ast.getLine());
+ sb.append(":");
+ sb.append(ast.getCharPositionInLine());
+ sb.append(" ");
+ sb.append(message);
+ sb.append(". Error encountered near token '");
+ sb.append(ErrorMsg.getText(ast));
+ sb.append("'");
+ return sb.toString();
+ }
+
+ ASTNode getAST() {
+ return this.ast;
+ }
+
+ protected void setAST(ASTNode newAST) {
+ this.ast = newAST;
+ }
+
+ /**
+ * Goes though the tabref tree and finds the alias for the table. Once found,
+ * it records the table name-> alias association in aliasToTabs. It also makes
+ * an association from the alias to the table AST in parse info.
+ *
+ * @return the alias of the table
+ */
+ private String processTable(QB qb, ASTNode tabref) throws SemanticException {
+ // For each table reference get the table name
+ // and the alias (if alias is not present, the table name
+ // is used as an alias)
+ int aliasIndex = 0;
+ int propsIndex = -1;
+ int tsampleIndex = -1;
+ int ssampleIndex = -1;
+ for (int index = 1; index < tabref.getChildCount(); index++) {
+ ASTNode ct = (ASTNode) tabref.getChild(index);
+ if (ct.getToken().getType() == HiveParser.TOK_TABLEBUCKETSAMPLE) {
+ tsampleIndex = index;
+ } else if (ct.getToken().getType() == HiveParser.TOK_TABLESPLITSAMPLE) {
+ ssampleIndex = index;
+ } else if (ct.getToken().getType() == HiveParser.TOK_TABLEPROPERTIES) {
+ propsIndex = index;
+ } else {
+ aliasIndex = index;
+ }
+ }
+
+ ASTNode tableTree = (ASTNode) (tabref.getChild(0));
+
+ String tabIdName = getUnescapedName(tableTree).toLowerCase();
+
+ String alias;
+ if (aliasIndex != 0) {
+ alias = unescapeIdentifier(tabref.getChild(aliasIndex).getText());
+ }
+ else {
+ alias = getUnescapedUnqualifiedTableName(tableTree);
+ }
+
+ if (propsIndex >= 0) {
+ Tree propsAST = tabref.getChild(propsIndex);
+ Map props = DDLSemanticAnalyzer.getProps((ASTNode) propsAST.getChild(0));
+ // We get the information from Calcite.
+ if ("TRUE".equals(props.get("insideView"))) {
+ qb.getAliasInsideView().add(alias.toLowerCase());
+ }
+ qb.setTabProps(alias, props);
+ }
+
+ // If the alias is already there then we have a conflict
+ if (qb.exists(alias)) {
+ throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(tabref
+ .getChild(aliasIndex)));
+ }
+ if (tsampleIndex >= 0) {
+ ASTNode sampleClause = (ASTNode) tabref.getChild(tsampleIndex);
+ ArrayList sampleCols = new ArrayList();
+ if (sampleClause.getChildCount() > 2) {
+ for (int i = 2; i < sampleClause.getChildCount(); i++) {
+ sampleCols.add((ASTNode) sampleClause.getChild(i));
+ }
+ }
+ // TODO: For now only support sampling on up to two columns
+ // Need to change it to list of columns
+ if (sampleCols.size() > 2) {
+ throw new SemanticException(generateErrorMessage(
+ (ASTNode) tabref.getChild(0),
+ ErrorMsg.SAMPLE_RESTRICTION.getMsg()));
+ }
+ TableSample tabSample = new TableSample(
+ unescapeIdentifier(sampleClause.getChild(0).getText()),
+ unescapeIdentifier(sampleClause.getChild(1).getText()),
+ sampleCols);
+ qb.getParseInfo().setTabSample(alias, tabSample);
+ if (unparseTranslator.isEnabled()) {
+ for (ASTNode sampleCol : sampleCols) {
+ unparseTranslator.addIdentifierTranslation((ASTNode) sampleCol
+ .getChild(0));
+ }
+ }
+ } else if (ssampleIndex >= 0) {
+ ASTNode sampleClause = (ASTNode) tabref.getChild(ssampleIndex);
+
+ Tree type = sampleClause.getChild(0);
+ Tree numerator = sampleClause.getChild(1);
+ String value = unescapeIdentifier(numerator.getText());
+
+
+ SplitSample sample;
+ if (type.getType() == HiveParser.TOK_PERCENT) {
+ assertCombineInputFormat(numerator, "Percentage");
+ Double percent = Double.valueOf(value).doubleValue();
+ if (percent < 0 || percent > 100) {
+ throw new SemanticException(generateErrorMessage((ASTNode) numerator,
+ "Sampling percentage should be between 0 and 100"));
+ }
+ int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM);
+ sample = new SplitSample(percent, seedNum);
+ } else if (type.getType() == HiveParser.TOK_ROWCOUNT) {
+ sample = new SplitSample(Integer.parseInt(value));
+ } else {
+ assert type.getType() == HiveParser.TOK_LENGTH;
+ assertCombineInputFormat(numerator, "Total Length");
+ long length = Integer.parseInt(value.substring(0, value.length() - 1));
+ char last = value.charAt(value.length() - 1);
+ if (last == 'k' || last == 'K') {
+ length <<= 10;
+ } else if (last == 'm' || last == 'M') {
+ length <<= 20;
+ } else if (last == 'g' || last == 'G') {
+ length <<= 30;
+ }
+ int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM);
+ sample = new SplitSample(length, seedNum);
+ }
+ String alias_id = getAliasId(alias, qb);
+ nameToSplitSample.put(alias_id, sample);
+ }
+ // Insert this map into the stats
+ qb.setTabAlias(alias, tabIdName);
+ if (qb.isInsideView()) {
+ qb.getAliasInsideView().add(alias.toLowerCase());
+ }
+ qb.addAlias(alias);
+
+ qb.getParseInfo().setSrcForAlias(alias, tableTree);
+
+ // if alias to CTE contains the alias, we do not do the translation because
+ // cte is actually a subquery.
+ if (!this.aliasToCTEs.containsKey(alias)) {
+ unparseTranslator.addTableNameTranslation(tableTree, SessionState.get().getCurrentDatabase());
+ if (aliasIndex != 0) {
+ unparseTranslator.addIdentifierTranslation((ASTNode) tabref.getChild(aliasIndex));
+ }
+ }
+
+ return alias;
+ }
+
+ Map getNameToSplitSampleMap() {
+ return this.nameToSplitSample;
+ }
+
+ /**
+ * Convert a string to Text format and write its bytes in the same way TextOutputFormat would do.
+ * This is needed to properly encode non-ascii characters.
+ */
+ private static void writeAsText(String text, FSDataOutputStream out) throws IOException {
+ Text to = new Text(text);
+ out.write(to.getBytes(), 0, to.getLength());
+ }
+
+ /**
+ * Generate a temp table out of a value clause
+ * See also {@link #preProcessForInsert(ASTNode, QB)}
+ */
+ private ASTNode genValuesTempTable(ASTNode originalFrom, QB qb) throws SemanticException {
+ Path dataDir = null;
+ if(!qb.getEncryptedTargetTablePaths().isEmpty()) {
+ //currently only Insert into T values(...) is supported thus only 1 values clause
+ //and only 1 target table are possible. If/when support for
+ //select ... from values(...) is added an insert statement may have multiple
+ //encrypted target tables.
+ dataDir = ctx.getMRTmpPath(qb.getEncryptedTargetTablePaths().get(0).toUri());
+ }
+ // Pick a name for the table
+ SessionState ss = SessionState.get();
+ String tableName = VALUES_TMP_TABLE_NAME_PREFIX + ss.getNextValuesTempTableSuffix();
+
+ // Step 1, parse the values clause we were handed
+ List extends Node> fromChildren = originalFrom.getChildren();
+ // First child should be the virtual table ref
+ ASTNode virtualTableRef = (ASTNode)fromChildren.get(0);
+ assert virtualTableRef.getToken().getType() == HiveParser.TOK_VIRTUAL_TABREF :
+ "Expected first child of TOK_VIRTUAL_TABLE to be TOK_VIRTUAL_TABREF but was " +
+ virtualTableRef.getName();
+
+ List extends Node> virtualTableRefChildren = virtualTableRef.getChildren();
+ // First child of this should be the table name. If it's anonymous,
+ // then we don't have a table name.
+ ASTNode tabName = (ASTNode)virtualTableRefChildren.get(0);
+ if (tabName.getToken().getType() != HiveParser.TOK_ANONYMOUS) {
+ // TODO, if you want to make select ... from (values(...) as foo(...) work,
+ // you need to parse this list of columns names and build it into the table
+ throw new SemanticException(ErrorMsg.VALUES_TABLE_CONSTRUCTOR_NOT_SUPPORTED.getMsg());
+ }
+
+ // The second child of the TOK_VIRTUAL_TABLE should be TOK_VALUES_TABLE
+ ASTNode valuesTable = (ASTNode)fromChildren.get(1);
+ assert valuesTable.getToken().getType() == HiveParser.TOK_VALUES_TABLE :
+ "Expected second child of TOK_VIRTUAL_TABLE to be TOK_VALUE_TABLE but was " +
+ valuesTable.getName();
+ // Each of the children of TOK_VALUES_TABLE will be a TOK_VALUE_ROW
+ List extends Node> valuesTableChildren = valuesTable.getChildren();
+
+ // Now that we're going to start reading through the rows, open a file to write the rows too
+ // If we leave this method before creating the temporary table we need to be sure to clean up
+ // this file.
+ Path tablePath = null;
+ FileSystem fs = null;
+ FSDataOutputStream out = null;
+ try {
+ if(dataDir == null) {
+ tablePath = Warehouse.getDnsPath(new Path(ss.getTempTableSpace(), tableName), conf);
+ }
+ else {
+ //if target table of insert is encrypted, make sure temporary table data is stored
+ //similarly encrypted
+ tablePath = Warehouse.getDnsPath(new Path(dataDir, tableName), conf);
+ }
+ fs = tablePath.getFileSystem(conf);
+ fs.mkdirs(tablePath);
+ Path dataFile = new Path(tablePath, "data_file");
+ out = fs.create(dataFile);
+ List fields = new ArrayList();
+
+ boolean firstRow = true;
+ for (Node n : valuesTableChildren) {
+ ASTNode valuesRow = (ASTNode) n;
+ assert valuesRow.getToken().getType() == HiveParser.TOK_VALUE_ROW :
+ "Expected child of TOK_VALUE_TABLE to be TOK_VALUE_ROW but was " + valuesRow.getName();
+ // Each of the children of this should be a literal
+ List extends Node> valuesRowChildren = valuesRow.getChildren();
+ boolean isFirst = true;
+ int nextColNum = 1;
+ for (Node n1 : valuesRowChildren) {
+ ASTNode value = (ASTNode) n1;
+ if (firstRow) {
+ fields.add(new FieldSchema("tmp_values_col" + nextColNum++, "string", ""));
+ }
+ if (isFirst) isFirst = false;
+ else writeAsText("\u0001", out);
+ writeAsText(unparseExprForValuesClause(value), out);
+ }
+ writeAsText("\n", out);
+ firstRow = false;
+ }
+
+ // Step 2, create a temp table, using the created file as the data
+ StorageFormat format = new StorageFormat(conf);
+ format.processStorageFormat("TextFile");
+ Table table = db.newTable(tableName);
+ table.setSerializationLib(format.getSerde());
+ table.setFields(fields);
+ table.setDataLocation(tablePath);
+ table.getTTable().setTemporary(true);
+ table.setStoredAsSubDirectories(false);
+ table.setInputFormatClass(format.getInputFormat());
+ table.setOutputFormatClass(format.getOutputFormat());
+ db.createTable(table, false);
+ } catch (Exception e) {
+ String errMsg = ErrorMsg.INSERT_CANNOT_CREATE_TEMP_FILE.getMsg() + e.getMessage();
+ LOG.error(errMsg);
+ // Try to delete the file
+ if (fs != null && tablePath != null) {
+ try {
+ fs.delete(tablePath, false);
+ } catch (IOException swallowIt) {}
+ }
+ throw new SemanticException(errMsg, e);
+ } finally {
+ IOUtils.closeStream(out);
+ }
+
+ // Step 3, return a new subtree with a from clause built around that temp table
+ // The form of the tree is TOK_TABREF->TOK_TABNAME->identifier(tablename)
+ Token t = new ClassicToken(HiveParser.TOK_TABREF);
+ ASTNode tabRef = new ASTNode(t);
+ t = new ClassicToken(HiveParser.TOK_TABNAME);
+ ASTNode tabNameNode = new ASTNode(t);
+ tabRef.addChild(tabNameNode);
+ t = new ClassicToken(HiveParser.Identifier, tableName);
+ ASTNode identifier = new ASTNode(t);
+ tabNameNode.addChild(identifier);
+ return tabRef;
+ }
+
+ // Take an expression in the values clause and turn it back into a string. This is far from
+ // comprehensive. At the moment it only supports:
+ // * literals (all types)
+ // * unary negatives
+ // * true/false
+ private String unparseExprForValuesClause(ASTNode expr) throws SemanticException {
+ switch (expr.getToken().getType()) {
+ case HiveParser.Number:
+ return expr.getText();
+
+ case HiveParser.StringLiteral:
+ return BaseSemanticAnalyzer.unescapeSQLString(expr.getText());
+
+ case HiveParser.KW_FALSE:
+ // UDFToBoolean casts any non-empty string to true, so set this to false
+ return "";
+
+ case HiveParser.KW_TRUE:
+ return "TRUE";
+
+ case HiveParser.MINUS:
+ return "-" + unparseExprForValuesClause((ASTNode)expr.getChildren().get(0));
+
+ case HiveParser.TOK_NULL:
+ // Hive's text input will translate this as a null
+ return "\\N";
+
+ default:
+ throw new SemanticException("Expression of type " + expr.getText() +
+ " not supported in insert/values");
+ }
+
+ }
+
+ private void assertCombineInputFormat(Tree numerator, String message) throws SemanticException {
+ String inputFormat = conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") ?
+ HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT):
+ HiveConf.getVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT);
+ if (!inputFormat.equals(CombineHiveInputFormat.class.getName())) {
+ throw new SemanticException(generateErrorMessage((ASTNode) numerator,
+ message + " sampling is not supported in " + inputFormat));
+ }
+ }
+
+ private String processSubQuery(QB qb, ASTNode subq) throws SemanticException {
+
+ // This is a subquery and must have an alias
+ if (subq.getChildCount() != 2) {
+ throw new SemanticException(ErrorMsg.NO_SUBQUERY_ALIAS.getMsg(subq));
+ }
+ ASTNode subqref = (ASTNode) subq.getChild(0);
+ String alias = unescapeIdentifier(subq.getChild(1).getText());
+
+ // Recursively do the first phase of semantic analysis for the subquery
+ QBExpr qbexpr = new QBExpr(alias);
+
+ doPhase1QBExpr(subqref, qbexpr, qb.getId(), alias);
+
+ // If the alias is already there then we have a conflict
+ if (qb.exists(alias)) {
+ throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(subq
+ .getChild(1)));
+ }
+ // Insert this map into the stats
+ qb.setSubqAlias(alias, qbexpr);
+ qb.addAlias(alias);
+
+ unparseTranslator.addIdentifierTranslation((ASTNode) subq.getChild(1));
+
+ return alias;
+ }
+
+ /*
+ * Phase1: hold onto any CTE definitions in aliasToCTE.
+ * CTE definitions are global to the Query.
+ */
+ private void processCTE(QB qb, ASTNode ctes) throws SemanticException {
+
+ int numCTEs = ctes.getChildCount();
+
+ for(int i=0; i aliasToCTEs map.
+ *
+ */
+ private CTEClause findCTEFromName(QB qb, String cteName) {
+ StringBuilder qId = new StringBuilder();
+ if (qb.getId() != null) {
+ qId.append(qb.getId());
+ }
+
+ while (qId.length() > 0) {
+ String nm = qId + ":" + cteName;
+ CTEClause cte = aliasToCTEs.get(nm);
+ if (cte != null) {
+ return cte;
+ }
+ int lastIndex = qId.lastIndexOf(":");
+ lastIndex = lastIndex < 0 ? 0 : lastIndex;
+ qId.setLength(lastIndex);
+ }
+ return aliasToCTEs.get(cteName);
+ }
+
+ /*
+ * If a CTE is referenced in a QueryBlock:
+ * - add it as a SubQuery for now.
+ * - SQ.alias is the alias used in QB. (if no alias is specified,
+ * it used the CTE name. Works just like table references)
+ * - Adding SQ done by:
+ * - copying AST of CTE
+ * - setting ASTOrigin on cloned AST.
+ * - trigger phase 1 on new QBExpr.
+ * - update QB data structs: remove this as a table reference, move it to a SQ invocation.
+ */
+ private void addCTEAsSubQuery(QB qb, String cteName, String cteAlias)
+ throws SemanticException {
+ cteAlias = cteAlias == null ? cteName : cteAlias;
+ CTEClause cte = findCTEFromName(qb, cteName);
+ ASTNode cteQryNode = cte.cteNode;
+ QBExpr cteQBExpr = new QBExpr(cteAlias);
+ doPhase1QBExpr(cteQryNode, cteQBExpr, qb.getId(), cteAlias);
+ qb.rewriteCTEToSubq(cteAlias, cteName, cteQBExpr);
+ }
+
+ private final CTEClause rootClause = new CTEClause(null, null);
+
+ @Override
+ public List> getAllRootTasks() {
+ if (!rootTasksResolved) {
+ rootTasks = toRealRootTasks(rootClause.asExecutionOrder());
+ rootTasksResolved = true;
+ }
+ return rootTasks;
+ }
+
+ @Override
+ public HashSet getAllInputs() {
+ HashSet readEntities = new HashSet(getInputs());
+ for (CTEClause cte : rootClause.asExecutionOrder()) {
+ if (cte.source != null) {
+ readEntities.addAll(cte.source.getInputs());
+ }
+ }
+ return readEntities;
+ }
+
+ @Override
+ public HashSet getAllOutputs() {
+ HashSet writeEntities = new HashSet(getOutputs());
+ for (CTEClause cte : rootClause.asExecutionOrder()) {
+ if (cte.source != null) {
+ writeEntities.addAll(cte.source.getOutputs());
+ }
+ }
+ return writeEntities;
+ }
+
+ class CTEClause {
+ CTEClause(String alias, ASTNode cteNode) {
+ this.alias = alias;
+ this.cteNode = cteNode;
+ }
+ String alias;
+ ASTNode cteNode;
+ boolean materialize;
+ int reference;
+ QBExpr qbExpr;
+ List parents = new ArrayList();
+
+ // materialized
+ Table table;
+ SemanticAnalyzer source;
+
+ List> getTasks() {
+ return source == null ? null : source.rootTasks;
+ }
+
+ List asExecutionOrder() {
+ List execution = new ArrayList();
+ asExecutionOrder(new HashSet(), execution);
+ return execution;
+ }
+
+ void asExecutionOrder(Set visited, List execution) {
+ for (CTEClause parent : parents) {
+ if (visited.add(parent)) {
+ parent.asExecutionOrder(visited, execution);
+ }
+ }
+ execution.add(this);
+ }
+
+ @Override
+ public String toString() {
+ return alias == null ? "" : alias;
+ }
+ }
+
+ private List> toRealRootTasks(List execution) {
+ List> cteRoots = new ArrayList<>();
+ List> cteLeafs = new ArrayList<>();
+ List> curTopRoots = null;
+ List> curBottomLeafs = null;
+ for (int i = 0; i < execution.size(); i++) {
+ CTEClause current = execution.get(i);
+ if (current.parents.isEmpty() && curTopRoots != null) {
+ cteRoots.addAll(curTopRoots);
+ cteLeafs.addAll(curBottomLeafs);
+ curTopRoots = curBottomLeafs = null;
+ }
+ List> curTasks = current.getTasks();
+ if (curTasks == null) {
+ continue;
+ }
+ if (curTopRoots == null) {
+ curTopRoots = curTasks;
+ }
+ if (curBottomLeafs != null) {
+ for (Task> topLeafTask : curBottomLeafs) {
+ for (Task> currentRootTask : curTasks) {
+ topLeafTask.addDependentTask(currentRootTask);
+ }
+ }
+ }
+ curBottomLeafs = Task.findLeafs(curTasks);
+ }
+ if (curTopRoots != null) {
+ cteRoots.addAll(curTopRoots);
+ cteLeafs.addAll(curBottomLeafs);
+ }
+
+ if (cteRoots.isEmpty()) {
+ return rootTasks;
+ }
+ for (Task> cteLeafTask : cteLeafs) {
+ for (Task> mainRootTask : rootTasks) {
+ cteLeafTask.addDependentTask(mainRootTask);
+ }
+ }
+ return cteRoots;
+ }
+
+ Table materializeCTE(String cteName, CTEClause cte) throws HiveException {
+
+ ASTNode createTable = new ASTNode(new ClassicToken(HiveParser.TOK_CREATETABLE));
+
+ ASTNode tableName = new ASTNode(new ClassicToken(HiveParser.TOK_TABNAME));
+ tableName.addChild(new ASTNode(new ClassicToken(HiveParser.Identifier, cteName)));
+
+ ASTNode temporary = new ASTNode(new ClassicToken(HiveParser.KW_TEMPORARY, MATERIALIZATION_MARKER));
+
+ createTable.addChild(tableName);
+ createTable.addChild(temporary);
+ createTable.addChild(cte.cteNode);
+
+ SemanticAnalyzer analyzer = new SemanticAnalyzer(queryState);
+ analyzer.initCtx(ctx);
+ analyzer.init(false);
+
+ // should share cte contexts
+ analyzer.aliasToCTEs.putAll(aliasToCTEs);
+
+ HiveOperation operation = queryState.getHiveOperation();
+ try {
+ analyzer.analyzeInternal(createTable);
+ } finally {
+ queryState.setCommandType(operation);
+ }
+
+ Table table = analyzer.tableDesc.toTable(conf);
+ Path location = table.getDataLocation();
+ try {
+ location.getFileSystem(conf).mkdirs(location);
+ } catch (IOException e) {
+ throw new HiveException(e);
+ }
+ table.setMaterializedTable(true);
+
+ LOG.info(cteName + " will be materialized into " + location);
+ cte.table = table;
+ cte.source = analyzer;
+
+ ctx.addMaterializedTable(cteName, table);
+
+ return table;
+ }
+
+
+ static boolean isJoinToken(ASTNode node) {
+ if ((node.getToken().getType() == HiveParser.TOK_JOIN)
+ || (node.getToken().getType() == HiveParser.TOK_CROSSJOIN)
+ || isOuterJoinToken(node)
+ || (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN)
+ || (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN)) {
+ return true;
+ }
+
+ return false;
+ }
+
+ static private boolean isOuterJoinToken(ASTNode node) {
+ return (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN)
+ || (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN)
+ || (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN);
+ }
+
+ /**
+ * Given the AST with TOK_JOIN as the root, get all the aliases for the tables
+ * or subqueries in the join.
+ *
+ * @param qb
+ * @param join
+ * @throws SemanticException
+ */
+ @SuppressWarnings("nls")
+ private void processJoin(QB qb, ASTNode join) throws SemanticException {
+ int numChildren = join.getChildCount();
+ if ((numChildren != 2) && (numChildren != 3)
+ && join.getToken().getType() != HiveParser.TOK_UNIQUEJOIN) {
+ throw new SemanticException(generateErrorMessage(join,
+ "Join with multiple children"));
+ }
+
+ queryProperties.incrementJoinCount(isOuterJoinToken(join));
+ for (int num = 0; num < numChildren; num++) {
+ ASTNode child = (ASTNode) join.getChild(num);
+ if (child.getToken().getType() == HiveParser.TOK_TABREF) {
+ processTable(qb, child);
+ } else if (child.getToken().getType() == HiveParser.TOK_SUBQUERY) {
+ processSubQuery(qb, child);
+ } else if (child.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) {
+ queryProperties.setHasPTF(true);
+ processPTF(qb, child);
+ PTFInvocationSpec ptfInvocationSpec = qb.getPTFInvocationSpec(child);
+ String inputAlias = ptfInvocationSpec == null ? null :
+ ptfInvocationSpec.getFunction().getAlias();;
+ if ( inputAlias == null ) {
+ throw new SemanticException(generateErrorMessage(child,
+ "PTF invocation in a Join must have an alias"));
+ }
+
+ } else if (child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW ||
+ child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
+ // SELECT * FROM src1 LATERAL VIEW udtf() AS myTable JOIN src2 ...
+ // is not supported. Instead, the lateral view must be in a subquery
+ // SELECT * FROM (SELECT * FROM src1 LATERAL VIEW udtf() AS myTable) a
+ // JOIN src2 ...
+ throw new SemanticException(ErrorMsg.LATERAL_VIEW_WITH_JOIN
+ .getMsg(join));
+ } else if (isJoinToken(child)) {
+ processJoin(qb, child);
+ }
+ }
+ }
+
+ /**
+ * Given the AST with TOK_LATERAL_VIEW as the root, get the alias for the
+ * table or subquery in the lateral view and also make a mapping from the
+ * alias to all the lateral view AST's.
+ *
+ * @param qb
+ * @param lateralView
+ * @return the alias for the table/subquery
+ * @throws SemanticException
+ */
+
+ private String processLateralView(QB qb, ASTNode lateralView)
+ throws SemanticException {
+ int numChildren = lateralView.getChildCount();
+
+ assert (numChildren == 2);
+ ASTNode next = (ASTNode) lateralView.getChild(1);
+
+ String alias = null;
+
+ switch (next.getToken().getType()) {
+ case HiveParser.TOK_TABREF:
+ alias = processTable(qb, next);
+ break;
+ case HiveParser.TOK_SUBQUERY:
+ alias = processSubQuery(qb, next);
+ break;
+ case HiveParser.TOK_LATERAL_VIEW:
+ case HiveParser.TOK_LATERAL_VIEW_OUTER:
+ alias = processLateralView(qb, next);
+ break;
+ default:
+ throw new SemanticException(ErrorMsg.LATERAL_VIEW_INVALID_CHILD
+ .getMsg(lateralView));
+ }
+ alias = alias.toLowerCase();
+ qb.getParseInfo().addLateralViewForAlias(alias, lateralView);
+ qb.addAlias(alias);
+ return alias;
+ }
+
+ /**
+ * Phase 1: (including, but not limited to):
+ *
+ * 1. Gets all the aliases for all the tables / subqueries and makes the
+ * appropriate mapping in aliasToTabs, aliasToSubq 2. Gets the location of the
+ * destination and names the clause "inclause" + i 3. Creates a map from a
+ * string representation of an aggregation tree to the actual aggregation AST
+ * 4. Creates a mapping from the clause name to the select expression AST in
+ * destToSelExpr 5. Creates a mapping from a table alias to the lateral view
+ * AST's in aliasToLateralViews
+ *
+ * @param ast
+ * @param qb
+ * @param ctx_1
+ * @throws SemanticException
+ */
+ @SuppressWarnings({"fallthrough", "nls"})
+ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx)
+ throws SemanticException {
+
+ boolean phase1Result = true;
+ QBParseInfo qbp = qb.getParseInfo();
+ boolean skipRecursion = false;
+
+ if (ast.getToken() != null) {
+ skipRecursion = true;
+ switch (ast.getToken().getType()) {
+ case HiveParser.TOK_SELECTDI:
+ qb.countSelDi();
+ // fall through
+ case HiveParser.TOK_SELECT:
+ qb.countSel();
+ qbp.setSelExprForClause(ctx_1.dest, ast);
+
+ int posn = 0;
+ if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST) {
+ qbp.setHints((ASTNode) ast.getChild(0));
+ posn++;
+ }
+
+ if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM))
+ queryProperties.setUsesScript(true);
+
+ LinkedHashMap aggregations = doPhase1GetAggregationsFromSelect(ast,
+ qb, ctx_1.dest);
+ doPhase1GetColumnAliasesFromSelect(ast, qbp);
+ qbp.setAggregationExprsForClause(ctx_1.dest, aggregations);
+ qbp.setDistinctFuncExprsForClause(ctx_1.dest,
+ doPhase1GetDistinctFuncExprs(aggregations));
+ break;
+
+ case HiveParser.TOK_WHERE:
+ qbp.setWhrExprForClause(ctx_1.dest, ast);
+ if (!SubQueryUtils.findSubQueries((ASTNode) ast.getChild(0)).isEmpty())
+ queryProperties.setFilterWithSubQuery(true);
+ break;
+
+ case HiveParser.TOK_INSERT_INTO:
+ String currentDatabase = SessionState.get().getCurrentDatabase();
+ String tab_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), currentDatabase);
+ qbp.addInsertIntoTable(tab_name, ast);
+
+ case HiveParser.TOK_DESTINATION:
+ ctx_1.dest = "insclause-" + ctx_1.nextNum;
+ ctx_1.nextNum++;
+ boolean isTmpFileDest = false;
+ if (ast.getChildCount() > 0 && ast.getChild(0) instanceof ASTNode) {
+ ASTNode ch = (ASTNode) ast.getChild(0);
+ if (ch.getToken().getType() == HiveParser.TOK_DIR && ch.getChildCount() > 0
+ && ch.getChild(0) instanceof ASTNode) {
+ ch = (ASTNode) ch.getChild(0);
+ isTmpFileDest = ch.getToken().getType() == HiveParser.TOK_TMP_FILE;
+ } else {
+ if (ast.getToken().getType() == HiveParser.TOK_DESTINATION
+ && ast.getChild(0).getType() == HiveParser.TOK_TAB) {
+ String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0),
+ SessionState.get().getCurrentDatabase());
+ qbp.getInsertOverwriteTables().put(fullTableName, ast);
+ }
+ }
+ }
+
+ // is there a insert in the subquery
+ if (qbp.getIsSubQ() && !isTmpFileDest) {
+ throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast));
+ }
+
+ if (plannerCtx != null) {
+ plannerCtx.setInsertToken(ast, isTmpFileDest);
+ }
+
+ qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0));
+ handleInsertStatementSpecPhase1(ast, qbp, ctx_1);
+ if (qbp.getClauseNamesForDest().size() > 1) {
+ queryProperties.setMultiDestQuery(true);
+ }
+ break;
+
+ case HiveParser.TOK_FROM:
+ int child_count = ast.getChildCount();
+ if (child_count != 1) {
+ throw new SemanticException(generateErrorMessage(ast,
+ "Multiple Children " + child_count));
+ }
+
+ // Check if this is a subquery / lateral view
+ ASTNode frm = (ASTNode) ast.getChild(0);
+ if (frm.getToken().getType() == HiveParser.TOK_TABREF) {
+ processTable(qb, frm);
+ } else if (frm.getToken().getType() == HiveParser.TOK_VIRTUAL_TABLE) {
+ // Create a temp table with the passed values in it then rewrite this portion of the
+ // tree to be from that table.
+ ASTNode newFrom = genValuesTempTable(frm, qb);
+ ast.setChild(0, newFrom);
+ processTable(qb, newFrom);
+ } else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) {
+ processSubQuery(qb, frm);
+ } else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW ||
+ frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
+ queryProperties.setHasLateralViews(true);
+ processLateralView(qb, frm);
+ } else if (isJoinToken(frm)) {
+ processJoin(qb, frm);
+ qbp.setJoinExpr(frm);
+ }else if(frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION){
+ queryProperties.setHasPTF(true);
+ processPTF(qb, frm);
+ }
+ break;
+
+ case HiveParser.TOK_CLUSTERBY:
+ // Get the clusterby aliases - these are aliased to the entries in the
+ // select list
+ queryProperties.setHasClusterBy(true);
+ qbp.setClusterByExprForClause(ctx_1.dest, ast);
+ break;
+
+ case HiveParser.TOK_DISTRIBUTEBY:
+ // Get the distribute by aliases - these are aliased to the entries in
+ // the
+ // select list
+ queryProperties.setHasDistributeBy(true);
+ qbp.setDistributeByExprForClause(ctx_1.dest, ast);
+ if (qbp.getClusterByForClause(ctx_1.dest) != null) {
+ throw new SemanticException(generateErrorMessage(ast,
+ ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
+ } else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
+ throw new SemanticException(generateErrorMessage(ast,
+ ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
+ }
+ break;
+
+ case HiveParser.TOK_SORTBY:
+ // Get the sort by aliases - these are aliased to the entries in the
+ // select list
+ queryProperties.setHasSortBy(true);
+ qbp.setSortByExprForClause(ctx_1.dest, ast);
+ if (qbp.getClusterByForClause(ctx_1.dest) != null) {
+ throw new SemanticException(generateErrorMessage(ast,
+ ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg()));
+ } else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
+ throw new SemanticException(generateErrorMessage(ast,
+ ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg()));
+ }
+
+ break;
+
+ case HiveParser.TOK_ORDERBY:
+ // Get the order by aliases - these are aliased to the entries in the
+ // select list
+ queryProperties.setHasOrderBy(true);
+ qbp.setOrderByExprForClause(ctx_1.dest, ast);
+ if (qbp.getClusterByForClause(ctx_1.dest) != null) {
+ throw new SemanticException(generateErrorMessage(ast,
+ ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg()));
+ }
+ break;
+
+ case HiveParser.TOK_GROUPBY:
+ case HiveParser.TOK_ROLLUP_GROUPBY:
+ case HiveParser.TOK_CUBE_GROUPBY:
+ case HiveParser.TOK_GROUPING_SETS:
+ // Get the groupby aliases - these are aliased to the entries in the
+ // select list
+ queryProperties.setHasGroupBy(true);
+ if (qbp.getJoinExpr() != null) {
+ queryProperties.setHasJoinFollowedByGroupBy(true);
+ }
+ if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
+ throw new SemanticException(generateErrorMessage(ast,
+ ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg()));
+ }
+ qbp.setGroupByExprForClause(ctx_1.dest, ast);
+ skipRecursion = true;
+
+ // Rollup and Cubes are syntactic sugar on top of grouping sets
+ if (ast.getToken().getType() == HiveParser.TOK_ROLLUP_GROUPBY) {
+ qbp.getDestRollups().add(ctx_1.dest);
+ } else if (ast.getToken().getType() == HiveParser.TOK_CUBE_GROUPBY) {
+ qbp.getDestCubes().add(ctx_1.dest);
+ } else if (ast.getToken().getType() == HiveParser.TOK_GROUPING_SETS) {
+ qbp.getDestGroupingSets().add(ctx_1.dest);
+ }
+ break;
+
+ case HiveParser.TOK_HAVING:
+ qbp.setHavingExprForClause(ctx_1.dest, ast);
+ qbp.addAggregationExprsForClause(ctx_1.dest,
+ doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest));
+ break;
+
+ case HiveParser.KW_WINDOW:
+ if (!qb.hasWindowingSpec(ctx_1.dest) ) {
+ throw new SemanticException(generateErrorMessage(ast,
+ "Query has no Cluster/Distribute By; but has a Window definition"));
+ }
+ handleQueryWindowClauses(qb, ctx_1, ast);
+ break;
+
+ case HiveParser.TOK_LIMIT:
+ if (ast.getChildCount() == 2) {
+ qbp.setDestLimit(ctx_1.dest,
+ new Integer(ast.getChild(0).getText()),
+ new Integer(ast.getChild(1).getText()));
+ } else {
+ qbp.setDestLimit(ctx_1.dest, new Integer(0),
+ new Integer(ast.getChild(0).getText()));
+ }
+ break;
+
+ case HiveParser.TOK_ANALYZE:
+ // Case of analyze command
+
+ String table_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0)).toLowerCase();
+
+
+ qb.setTabAlias(table_name, table_name);
+ qb.addAlias(table_name);
+ qb.getParseInfo().setIsAnalyzeCommand(true);
+ qb.getParseInfo().setNoScanAnalyzeCommand(this.noscan);
+ qb.getParseInfo().setPartialScanAnalyzeCommand(this.partialscan);
+ // Allow analyze the whole table and dynamic partitions
+ HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
+ HiveConf.setVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict");
+
+ break;
+
+ case HiveParser.TOK_UNIONALL:
+ if (!qbp.getIsSubQ()) {
+ // this shouldn't happen. The parser should have converted the union to be
+ // contained in a subquery. Just in case, we keep the error as a fallback.
+ throw new SemanticException(generateErrorMessage(ast,
+ ErrorMsg.UNION_NOTIN_SUBQ.getMsg()));
+ }
+ skipRecursion = false;
+ break;
+
+ case HiveParser.TOK_INSERT:
+ ASTNode destination = (ASTNode) ast.getChild(0);
+ Tree tab = destination.getChild(0);
+
+ // Proceed if AST contains partition & If Not Exists
+ if (destination.getChildCount() == 2 &&
+ tab.getChildCount() == 2 &&
+ destination.getChild(1).getType() == HiveParser.TOK_IFNOTEXISTS) {
+ String tableName = tab.getChild(0).getChild(0).getText();
+
+ Tree partitions = tab.getChild(1);
+ int childCount = partitions.getChildCount();
+ HashMap partition = new HashMap();
+ for (int i = 0; i < childCount; i++) {
+ String partitionName = partitions.getChild(i).getChild(0).getText();
+ Tree pvalue = partitions.getChild(i).getChild(1);
+ if (pvalue == null) {
+ break;
+ }
+ String partitionVal = stripQuotes(pvalue.getText());
+ partition.put(partitionName, partitionVal);
+ }
+ // if it is a dynamic partition throw the exception
+ if (childCount != partition.size()) {
+ throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS
+ .getMsg(partition.toString()));
+ }
+ Table table = null;
+ try {
+ table = this.getTableObjectByName(tableName);
+ } catch (HiveException ex) {
+ throw new SemanticException(ex);
+ }
+ try {
+ Partition parMetaData = db.getPartition(table, partition, false);
+ // Check partition exists if it exists skip the overwrite
+ if (parMetaData != null) {
+ phase1Result = false;
+ skipRecursion = true;
+ LOG.info("Partition already exists so insert into overwrite " +
+ "skipped for partition : " + parMetaData.toString());
+ break;
+ }
+ } catch (HiveException e) {
+ LOG.info("Error while getting metadata : ", e);
+ }
+ validatePartSpec(table, partition, (ASTNode)tab, conf, false);
+ }
+ skipRecursion = false;
+ break;
+ case HiveParser.TOK_LATERAL_VIEW:
+ case HiveParser.TOK_LATERAL_VIEW_OUTER:
+ // todo: nested LV
+ assert ast.getChildCount() == 1;
+ qb.getParseInfo().getDestToLateralView().put(ctx_1.dest, ast);
+ break;
+ case HiveParser.TOK_CTE:
+ processCTE(qb, ast);
+ break;
+ default:
+ skipRecursion = false;
+ break;
+ }
+ }
+
+ if (!skipRecursion) {
+ // Iterate over the rest of the children
+ int child_count = ast.getChildCount();
+ for (int child_pos = 0; child_pos < child_count && phase1Result; ++child_pos) {
+ // Recurse
+ phase1Result = phase1Result && doPhase1(
+ (ASTNode)ast.getChild(child_pos), qb, ctx_1, plannerCtx);
+ }
+ }
+ return phase1Result;
+ }
+
+ /**
+ * This is phase1 of supporting specifying schema in insert statement
+ * insert into foo(z,y) select a,b from bar;
+ * @see #handleInsertStatementSpec(java.util.List, String, RowResolver, RowResolver, QB, ASTNode)
+ * @throws SemanticException
+ */
+ private void handleInsertStatementSpecPhase1(ASTNode ast, QBParseInfo qbp, Phase1Ctx ctx_1) throws SemanticException {
+ ASTNode tabColName = (ASTNode)ast.getChild(1);
+ if(ast.getType() == HiveParser.TOK_INSERT_INTO && tabColName != null && tabColName.getType() == HiveParser.TOK_TABCOLNAME) {
+ //we have "insert into foo(a,b)..."; parser will enforce that 1+ columns are listed if TOK_TABCOLNAME is present
+ List targetColNames = new ArrayList();
+ for(Node col : tabColName.getChildren()) {
+ assert ((ASTNode)col).getType() == HiveParser.Identifier :
+ "expected token " + HiveParser.Identifier + " found " + ((ASTNode)col).getType();
+ targetColNames.add(((ASTNode)col).getText());
+ }
+ String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0),
+ SessionState.get().getCurrentDatabase());
+ qbp.setDestSchemaForClause(ctx_1.dest, targetColNames);
+ Set targetColumns = new HashSet();
+ targetColumns.addAll(targetColNames);
+ if(targetColNames.size() != targetColumns.size()) {
+ throw new SemanticException(generateErrorMessage(tabColName,
+ "Duplicate column name detected in " + fullTableName + " table schema specification"));
+ }
+ Table targetTable = null;
+ try {
+ targetTable = db.getTable(fullTableName, false);
+ }
+ catch (HiveException ex) {
+ LOG.error("Error processing HiveParser.TOK_DESTINATION: " + ex.getMessage(), ex);
+ throw new SemanticException(ex);
+ }
+ if(targetTable == null) {
+ throw new SemanticException(generateErrorMessage(ast,
+ "Unable to access metadata for table " + fullTableName));
+ }
+ for(FieldSchema f : targetTable.getCols()) {
+ //parser only allows foo(a,b), not foo(foo.a, foo.b)
+ targetColumns.remove(f.getName());
+ }
+ if(!targetColumns.isEmpty()) {//here we need to see if remaining columns are dynamic partition columns
+ /* We just checked the user specified schema columns among regular table column and found some which are not
+ 'regular'. Now check is they are dynamic partition columns
+ For dynamic partitioning,
+ Given "create table multipart(a int, b int) partitioned by (c int, d int);"
+ for "insert into multipart partition(c='1',d)(d,a) values(2,3);" we expect parse tree to look like this
+ (TOK_INSERT_INTO
+ (TOK_TAB
+ (TOK_TABNAME multipart)
+ (TOK_PARTSPEC
+ (TOK_PARTVAL c '1')
+ (TOK_PARTVAL d)
+ )
+ )
+ (TOK_TABCOLNAME d a)
+ )*/
+ List dynamicPartitionColumns = new ArrayList();
+ if(ast.getChild(0) != null && ast.getChild(0).getType() == HiveParser.TOK_TAB) {
+ ASTNode tokTab = (ASTNode)ast.getChild(0);
+ ASTNode tokPartSpec = (ASTNode)tokTab.getFirstChildWithType(HiveParser.TOK_PARTSPEC);
+ if(tokPartSpec != null) {
+ for(Node n : tokPartSpec.getChildren()) {
+ ASTNode tokPartVal = null;
+ if(n instanceof ASTNode) {
+ tokPartVal = (ASTNode)n;
+ }
+ if(tokPartVal != null && tokPartVal.getType() == HiveParser.TOK_PARTVAL && tokPartVal.getChildCount() == 1) {
+ assert tokPartVal.getChild(0).getType() == HiveParser.Identifier :
+ "Expected column name; found tokType=" + tokPartVal.getType();
+ dynamicPartitionColumns.add(tokPartVal.getChild(0).getText());
+ }
+ }
+ }
+ }
+ for(String colName : dynamicPartitionColumns) {
+ targetColumns.remove(colName);
+ }
+ if(!targetColumns.isEmpty()) {
+ //Found some columns in user specified schema which are neither regular not dynamic partition columns
+ throw new SemanticException(generateErrorMessage(tabColName,
+ "'" + (targetColumns.size() == 1 ? targetColumns.iterator().next() : targetColumns) +
+ "' in insert schema specification " + (targetColumns.size() == 1 ? "is" : "are") +
+ " not found among regular columns of " +
+ fullTableName + " nor dynamic partition columns."));
+ }
+ }
+ }
+ }
+
+ public void getMaterializationMetadata(QB qb) throws SemanticException {
+ try {
+ gatherCTEReferences(qb, rootClause);
+ int threshold = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_CTE_MATERIALIZE_THRESHOLD);
+ for (CTEClause cte : Sets.newHashSet(aliasToCTEs.values())) {
+ if (threshold >= 0 && cte.reference >= threshold) {
+ cte.materialize = true;
+ }
+ }
+ } catch (HiveException e) {
+ // Has to use full name to make sure it does not conflict with
+ // org.apache.commons.lang.StringUtils
+ LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+ if (e instanceof SemanticException) {
+ throw (SemanticException)e;
+ }
+ throw new SemanticException(e.getMessage(), e);
+ }
+ }
+
+ private void gatherCTEReferences(QBExpr qbexpr, CTEClause parent) throws HiveException {
+ if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) {
+ gatherCTEReferences(qbexpr.getQB(), parent);
+ } else {
+ gatherCTEReferences(qbexpr.getQBExpr1(), parent);
+ gatherCTEReferences(qbexpr.getQBExpr2(), parent);
+ }
+ }
+
+ // TODO: check view references, too
+ private void gatherCTEReferences(QB qb, CTEClause current) throws HiveException {
+ for (String alias : qb.getTabAliases()) {
+ String tabName = qb.getTabNameForAlias(alias);
+ String cteName = tabName.toLowerCase();
+
+ CTEClause cte = findCTEFromName(qb, cteName);
+ if (cte != null) {
+ if (ctesExpanded.contains(cteName)) {
+ throw new SemanticException("Recursive cte " + cteName +
+ " detected (cycle: " + StringUtils.join(ctesExpanded, " -> ") +
+ " -> " + cteName + ").");
+ }
+ cte.reference++;
+ current.parents.add(cte);
+ if (cte.qbExpr != null) {
+ continue;
+ }
+ cte.qbExpr = new QBExpr(cteName);
+ doPhase1QBExpr(cte.cteNode, cte.qbExpr, qb.getId(), cteName);
+
+ ctesExpanded.add(cteName);
+ gatherCTEReferences(cte.qbExpr, cte);
+ ctesExpanded.remove(ctesExpanded.size() - 1);
+ }
+ }
+ for (String alias : qb.getSubqAliases()) {
+ gatherCTEReferences(qb.getSubqForAlias(alias), current);
+ }
+ }
+
+ public void getMetaData(QB qb) throws SemanticException {
+ getMetaData(qb, false);
+ }
+
+ public void getMetaData(QB qb, boolean enableMaterialization) throws SemanticException {
+ try {
+ if (enableMaterialization) {
+ getMaterializationMetadata(qb);
+ }
+ getMetaData(qb, null);
+ } catch (HiveException e) {
+ // Has to use full name to make sure it does not conflict with
+ // org.apache.commons.lang.StringUtils
+ LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+ if (e instanceof SemanticException) {
+ throw (SemanticException)e;
+ }
+ throw new SemanticException(e.getMessage(), e);
+ }
+ }
+
+ private void getMetaData(QBExpr qbexpr, ReadEntity parentInput)
+ throws HiveException {
+ if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) {
+ getMetaData(qbexpr.getQB(), parentInput);
+ } else {
+ getMetaData(qbexpr.getQBExpr1(), parentInput);
+ getMetaData(qbexpr.getQBExpr2(), parentInput);
+ }
+ }
+
+ @SuppressWarnings("nls")
+ private void getMetaData(QB qb, ReadEntity parentInput)
+ throws HiveException {
+ LOG.info("Get metadata for source tables");
+
+ // Go over the tables and populate the related structures.
+ // We have to materialize the table alias list since we might
+ // modify it in the middle for view rewrite.
+ List tabAliases = new ArrayList(qb.getTabAliases());
+
+ // Keep track of view alias to view name and read entity
+ // For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
+ // keeps track of full view name and read entity corresponding to alias V3, V3:V2, V3:V2:V1.
+ // This is needed for tracking the dependencies for inputs, along with their parents.
+ Map> aliasToViewInfo =
+ new HashMap>();
+
+ /*
+ * used to capture view to SQ conversions. This is used to check for
+ * recursive CTE invocations.
+ */
+ Map sqAliasToCTEName = new HashMap();
+
+ for (String alias : tabAliases) {
+ String tabName = qb.getTabNameForAlias(alias);
+ String cteName = tabName.toLowerCase();
+
+ Table tab = db.getTable(tabName, false);
+ if (tab == null ||
+ tab.getDbName().equals(SessionState.get().getCurrentDatabase())) {
+ Table materializedTab = ctx.getMaterializedTable(cteName);
+ if (materializedTab == null) {
+ // we first look for this alias from CTE, and then from catalog.
+ CTEClause cte = findCTEFromName(qb, cteName);
+ if (cte != null) {
+ if (!cte.materialize) {
+ addCTEAsSubQuery(qb, cteName, alias);
+ sqAliasToCTEName.put(alias, cteName);
+ continue;
+ }
+ tab = materializeCTE(cteName, cte);
+ }
+ } else {
+ tab = materializedTab;
+ }
+ }
+
+ if (tab == null) {
+ ASTNode src = qb.getParseInfo().getSrcForAlias(alias);
+ if (null != src) {
+ throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(src));
+ } else {
+ throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(alias));
+ }
+ }
+
+ // Disallow INSERT INTO on bucketized tables
+ boolean isAcid = AcidUtils.isAcidTable(tab);
+ boolean isTableWrittenTo = qb.getParseInfo().isInsertIntoTable(tab.getDbName(), tab.getTableName());
+ if (isTableWrittenTo &&
+ tab.getNumBuckets() > 0 && !isAcid) {
+ throw new SemanticException(ErrorMsg.INSERT_INTO_BUCKETIZED_TABLE.
+ getMsg("Table: " + tabName));
+ }
+ // Disallow update and delete on non-acid tables
+ if ((updating() || deleting()) && !isAcid && isTableWrittenTo) {
+ //isTableWrittenTo: delete from acidTbl where a in (select id from nonAcidTable)
+ //so only assert this if we are actually writing to this table
+ // Whether we are using an acid compliant transaction manager has already been caught in
+ // UpdateDeleteSemanticAnalyzer, so if we are updating or deleting and getting nonAcid
+ // here, it means the table itself doesn't support it.
+ throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, tabName);
+ }
+
+ if (tab.isView()) {
+ if (qb.getParseInfo().isAnalyzeCommand()) {
+ throw new SemanticException(ErrorMsg.ANALYZE_VIEW.getMsg());
+ }
+ String fullViewName = tab.getDbName() + "." + tab.getTableName();
+ // Prevent view cycles
+ if (viewsExpanded.contains(fullViewName)) {
+ throw new SemanticException("Recursive view " + fullViewName +
+ " detected (cycle: " + StringUtils.join(viewsExpanded, " -> ") +
+ " -> " + fullViewName + ").");
+ }
+ replaceViewReferenceWithDefinition(qb, tab, tabName, alias);
+ // This is the last time we'll see the Table objects for views, so add it to the inputs
+ // now. isInsideView will tell if this view is embedded in another view.
+ ReadEntity viewInput = new ReadEntity(tab, parentInput, !qb.isInsideView());
+ viewInput = PlanUtils.addInput(inputs, viewInput);
+ aliasToViewInfo.put(alias, new ObjectPair(fullViewName, viewInput));
+ String aliasId = getAliasId(alias, qb);
+ if (aliasId != null) {
+ aliasId = aliasId.replace(SemanticAnalyzer.SUBQUERY_TAG_1, "")
+ .replace(SemanticAnalyzer.SUBQUERY_TAG_2, "");
+ }
+ viewAliasToInput.put(aliasId, viewInput);
+ continue;
+ }
+
+ if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) {
+ throw new SemanticException(generateErrorMessage(
+ qb.getParseInfo().getSrcForAlias(alias),
+ ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg()));
+ }
+
+ qb.getMetaData().setSrcForAlias(alias, tab);
+
+ if (qb.getParseInfo().isAnalyzeCommand()) {
+ // allow partial partition specification for nonscan since noscan is fast.
+ TableSpec ts = new TableSpec(db, conf, (ASTNode) ast.getChild(0), true, this.noscan);
+ if (ts.specType == SpecType.DYNAMIC_PARTITION) { // dynamic partitions
+ try {
+ ts.partitions = db.getPartitionsByNames(ts.tableHandle, ts.partSpec);
+ } catch (HiveException e) {
+ throw new SemanticException(generateErrorMessage(
+ qb.getParseInfo().getSrcForAlias(alias),
+ "Cannot get partitions for " + ts.partSpec), e);
+ }
+ }
+ // validate partial scan command
+ QBParseInfo qbpi = qb.getParseInfo();
+ if (qbpi.isPartialScanAnalyzeCommand()) {
+ Class extends InputFormat> inputFormatClass = null;
+ switch (ts.specType) {
+ case TABLE_ONLY:
+ case DYNAMIC_PARTITION:
+ inputFormatClass = ts.tableHandle.getInputFormatClass();
+ break;
+ case STATIC_PARTITION:
+ inputFormatClass = ts.partHandle.getInputFormatClass();
+ break;
+ default:
+ assert false;
+ }
+ // throw a HiveException for formats other than rcfile or orcfile.
+ if (!(inputFormatClass.equals(RCFileInputFormat.class) || inputFormatClass
+ .equals(OrcInputFormat.class))) {
+ throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_NON_RCFILE.getMsg());
+ }
+ }
+
+ tab.setTableSpec(ts);
+ qb.getParseInfo().addTableSpec(alias, ts);
+ }
+
+ ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput);
+ PlanUtils.addInput(inputs,
+ new ReadEntity(tab, parentViewInfo, parentViewInfo == null),mergeIsDirect);
+ }
+
+ LOG.info("Get metadata for subqueries");
+ // Go over the subqueries and getMetaData for these
+ for (String alias : qb.getSubqAliases()) {
+ boolean wasView = aliasToViewInfo.containsKey(alias);
+ boolean wasCTE = sqAliasToCTEName.containsKey(alias);
+ ReadEntity newParentInput = null;
+ if (wasView) {
+ viewsExpanded.add(aliasToViewInfo.get(alias).getFirst());
+ newParentInput = aliasToViewInfo.get(alias).getSecond();
+ } else if (wasCTE) {
+ ctesExpanded.add(sqAliasToCTEName.get(alias));
+ }
+ QBExpr qbexpr = qb.getSubqForAlias(alias);
+ getMetaData(qbexpr, newParentInput);
+ if (wasView) {
+ viewsExpanded.remove(viewsExpanded.size() - 1);
+ } else if (wasCTE) {
+ ctesExpanded.remove(ctesExpanded.size() - 1);
+ }
+ }
+
+ RowFormatParams rowFormatParams = new RowFormatParams();
+ StorageFormat storageFormat = new StorageFormat(conf);
+
+ LOG.info("Get metadata for destination tables");
+ // Go over all the destination structures and populate the related
+ // metadata
+ QBParseInfo qbp = qb.getParseInfo();
+
+ for (String name : qbp.getClauseNamesForDest()) {
+ ASTNode ast = qbp.getDestForClause(name);
+ switch (ast.getToken().getType()) {
+ case HiveParser.TOK_TAB: {
+ TableSpec ts = new TableSpec(db, conf, ast);
+ if (ts.tableHandle.isView()) {
+ throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
+ }
+
+ Class> outputFormatClass = ts.tableHandle.getOutputFormatClass();
+ if (!ts.tableHandle.isNonNative() &&
+ !HiveOutputFormat.class.isAssignableFrom(outputFormatClass)) {
+ throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE
+ .getMsg(ast, "The class is " + outputFormatClass.toString()));
+ }
+
+ // TableSpec ts is got from the query (user specified),
+ // which means the user didn't specify partitions in their query,
+ // but whether the table itself is partitioned is not know.
+ if (ts.specType != SpecType.STATIC_PARTITION) {
+ // This is a table or dynamic partition
+ qb.getMetaData().setDestForAlias(name, ts.tableHandle);
+ // has dynamic as well as static partitions
+ if (ts.partSpec != null && ts.partSpec.size() > 0) {
+ qb.getMetaData().setPartSpecForAlias(name, ts.partSpec);
+ }
+ } else {
+ // This is a partition
+ qb.getMetaData().setDestForAlias(name, ts.partHandle);
+ }
+ if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
+ // Add the table spec for the destination table.
+ qb.getParseInfo().addTableSpec(ts.tableName.toLowerCase(), ts);
+ }
+ break;
+ }
+
+ case HiveParser.TOK_DIR: {
+ // This is a dfs file
+ String fname = stripQuotes(ast.getChild(0).getText());
+ if ((!qb.getParseInfo().getIsSubQ())
+ && (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_TMP_FILE)) {
+
+ if (qb.isCTAS()) {
+ qb.setIsQuery(false);
+ ctx.setResDir(null);
+ ctx.setResFile(null);
+
+ // allocate a temporary output dir on the location of the table
+ String tableName = getUnescapedName((ASTNode) ast.getChild(0));
+ String[] names = Utilities.getDbTableName(tableName);
+ Path location;
+ try {
+ Warehouse wh = new Warehouse(conf);
+ //Use destination table's db location.
+ String destTableDb = qb.getTableDesc() != null? qb.getTableDesc().getDatabaseName(): null;
+ if (destTableDb == null) {
+ destTableDb = names[0];
+ }
+ location = wh.getDatabasePath(db.getDatabase(destTableDb));
+ } catch (MetaException e) {
+ throw new SemanticException(e);
+ }
+ try {
+ fname = ctx.getExtTmpPathRelTo(
+ FileUtils.makeQualified(location, conf)).toString();
+ } catch (Exception e) {
+ throw new SemanticException(generateErrorMessage(ast,
+ "Error creating temporary folder on: " + location.toString()), e);
+ }
+ if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
+ TableSpec ts = new TableSpec(db, conf, this.ast);
+ // Add the table spec for the destination table.
+ qb.getParseInfo().addTableSpec(ts.tableName.toLowerCase(), ts);
+ }
+ } else {
+ // This is the only place where isQuery is set to true; it defaults to false.
+ qb.setIsQuery(true);
+ Path stagingPath = getStagingDirectoryPathname(qb);
+ fname = stagingPath.toString();
+ ctx.setResDir(stagingPath);
+ }
+ }
+
+ boolean isDfsFile = true;
+ if (ast.getChildCount() >= 2 && ast.getChild(1).getText().toLowerCase().equals("local")) {
+ isDfsFile = false;
+ }
+ qb.getMetaData().setDestForAlias(name, fname, isDfsFile);
+
+ CreateTableDesc directoryDesc = new CreateTableDesc();
+ boolean directoryDescIsSet = false;
+ int numCh = ast.getChildCount();
+ for (int num = 1; num < numCh ; num++){
+ ASTNode child = (ASTNode) ast.getChild(num);
+ if (child != null) {
+ if (storageFormat.fillStorageFormat(child)) {
+ directoryDesc.setOutputFormat(storageFormat.getOutputFormat());
+ directoryDesc.setSerName(storageFormat.getSerde());
+ directoryDescIsSet = true;
+ continue;
+ }
+ switch (child.getToken().getType()) {
+ case HiveParser.TOK_TABLEROWFORMAT:
+ rowFormatParams.analyzeRowFormat(child);
+ directoryDesc.setFieldDelim(rowFormatParams.fieldDelim);
+ directoryDesc.setLineDelim(rowFormatParams.lineDelim);
+ directoryDesc.setCollItemDelim(rowFormatParams.collItemDelim);
+ directoryDesc.setMapKeyDelim(rowFormatParams.mapKeyDelim);
+ directoryDesc.setFieldEscape(rowFormatParams.fieldEscape);
+ directoryDesc.setNullFormat(rowFormatParams.nullFormat);
+ directoryDescIsSet=true;
+ break;
+ case HiveParser.TOK_TABLESERIALIZER:
+ ASTNode serdeChild = (ASTNode) child.getChild(0);
+ storageFormat.setSerde(unescapeSQLString(serdeChild.getChild(0).getText()));
+ directoryDesc.setSerName(storageFormat.getSerde());
+ if (serdeChild.getChildCount() > 1) {
+ directoryDesc.setSerdeProps(new HashMap());
+ readProps((ASTNode) serdeChild.getChild(1).getChild(0), directoryDesc.getSerdeProps());
+ }
+ directoryDescIsSet = true;
+ break;
+ }
+ }
+ }
+ if (directoryDescIsSet){
+ qb.setDirectoryDesc(directoryDesc);
+ }
+ break;
+ }
+ default:
+ throw new SemanticException(generateErrorMessage(ast,
+ "Unknown Token Type " + ast.getToken().getType()));
+ }
+ }
+ }
+
+ /**
+ * Checks if a given path is encrypted (valid only for HDFS files)
+ * @param path The path to check for encryption
+ * @return True if the path is encrypted; False if it is not encrypted
+ * @throws HiveException If an error occurs while checking for encryption
+ */
+ private boolean isPathEncrypted(Path path) throws HiveException {
+
+ try {
+ HadoopShims.HdfsEncryptionShim hdfsEncryptionShim = SessionState.get().getHdfsEncryptionShim(path.getFileSystem(conf));
+ if (hdfsEncryptionShim != null) {
+ if (hdfsEncryptionShim.isPathEncrypted(path)) {
+ return true;
+ }
+ }
+ } catch (Exception e) {
+ throw new HiveException("Unable to determine if " + path + " is encrypted: " + e, e);
+ }
+
+ return false;
+ }
+
+ /**
+ * Compares to path key encryption strenghts.
+ *
+ * @param p1 Path to an HDFS file system
+ * @param p2 Path to an HDFS file system
+ * @return -1 if strength is weak; 0 if is equals; 1 if it is stronger
+ * @throws HiveException If an error occurs while comparing key strengths.
+ */
+ private int comparePathKeyStrength(Path p1, Path p2) throws HiveException {
+ HadoopShims.HdfsEncryptionShim hdfsEncryptionShim;
+
+ hdfsEncryptionShim = SessionState.get().getHdfsEncryptionShim();
+ if (hdfsEncryptionShim != null) {
+ try {
+ return hdfsEncryptionShim.comparePathKeyStrength(p1, p2);
+ } catch (Exception e) {
+ throw new HiveException("Unable to compare key strength for " + p1 + " and " + p2 + " : " + e, e);
+ }
+ }
+
+ return 0; // Non-encrypted path (or equals strength)
+ }
+
+ /**
+ * Checks if a given path has read-only access permissions.
+ *
+ * @param path The path to check for read-only permissions.
+ * @return True if the path is read-only; False otherwise.
+ * @throws HiveException If an error occurs while checking file permissions.
+ */
+ private boolean isPathReadOnly(Path path) throws HiveException {
+ HiveConf conf = SessionState.get().getConf();
+ try {
+ FileSystem fs = path.getFileSystem(conf);
+ UserGroupInformation ugi = Utils.getUGI();
+ FileStatus status = fs.getFileStatus(path);
+
+ // We just check for writing permissions. If it fails with AccessControException, then it
+ // means the location may be read-only.
+ FileUtils.checkFileAccessWithImpersonation(fs, status, FsAction.WRITE, ugi.getUserName());
+
+ // Path has writing permissions
+ return false;
+ } catch (AccessControlException e) {
+ // An AccessControlException may be caused for other different errors,
+ // but we take it as if our path is read-only
+ return true;
+ } catch (Exception e) {
+ throw new HiveException("Unable to determine if " + path + " is read only: " + e, e);
+ }
+ }
+
+ /**
+ * Gets the strongest encrypted table path.
+ *
+ * @param qb The QB object that contains a list of all table locations.
+ * @return The strongest encrypted path. It may return NULL if there are not tables encrypted, or are not HDFS tables.
+ * @throws HiveException if an error occurred attempting to compare the encryption strength
+ */
+ private Path getStrongestEncryptedTablePath(QB qb) throws HiveException {
+ List tabAliases = new ArrayList(qb.getTabAliases());
+ Path strongestPath = null;
+
+ /* Walk through all found table locations to get the most encrypted table */
+ for (String alias : tabAliases) {
+ Table tab = qb.getMetaData().getTableForAlias(alias);
+ if (tab != null) {
+ Path tablePath = tab.getDataLocation();
+ if (tablePath != null) {
+ if ("hdfs".equalsIgnoreCase(tablePath.toUri().getScheme())) {
+ if (isPathEncrypted(tablePath)) {
+ if (strongestPath == null) {
+ strongestPath = tablePath;
+ } else if (comparePathKeyStrength(tablePath, strongestPath) > 0) {
+ strongestPath = tablePath;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return strongestPath;
+ }
+
+ /**
+ * Gets the staging directory where MR files will be stored temporary.
+ * It walks through the QB plan to find the correct location where save temporary files. This
+ * temporary location (or staging directory) may be created inside encrypted tables locations for
+ * security reasons. If the QB has read-only tables, then the older scratch directory will be used,
+ * or a permission error will be thrown if the requested query table is encrypted and the old scratch
+ * directory is not.
+ *
+ * @param qb The QB object that contains a list of all table locations.
+ * @return The path to the staging directory.
+ * @throws HiveException If an error occurs while identifying the correct staging location.
+ */
+ private Path getStagingDirectoryPathname(QB qb) throws HiveException {
+ Path stagingPath = null, tablePath;
+
+ // Looks for the most encrypted table location
+ // It may return null if there are not tables encrypted, or are not part of HDFS
+ tablePath = getStrongestEncryptedTablePath(qb);
+ if (tablePath != null) {
+ // At this point, tablePath is part of HDFS and it is encrypted
+ if (isPathReadOnly(tablePath)) {
+ Path tmpPath = ctx.getMRTmpPath();
+ if (comparePathKeyStrength(tablePath, tmpPath) < 0) {
+ throw new HiveException("Read-only encrypted tables cannot be read " +
+ "if the scratch directory is not encrypted (or encryption is weak)");
+ } else {
+ stagingPath = tmpPath;
+ }
+ }
+
+ if (stagingPath == null) {
+ stagingPath = ctx.getMRTmpPath(tablePath.toUri());
+ }
+ } else {
+ stagingPath = ctx.getMRTmpPath();
+ }
+
+ return stagingPath;
+ }
+
+ private void replaceViewReferenceWithDefinition(QB qb, Table tab,
+ String tab_name, String alias) throws SemanticException {
+
+ ParseDriver pd = new ParseDriver();
+ ASTNode viewTree;
+ final ASTNodeOrigin viewOrigin = new ASTNodeOrigin("VIEW", tab.getTableName(),
+ tab.getViewExpandedText(), alias, qb.getParseInfo().getSrcForAlias(
+ alias));
+ try {
+ String viewText = tab.getViewExpandedText();
+ // Reparse text, passing null for context to avoid clobbering
+ // the top-level token stream.
+ ASTNode tree = pd.parse(viewText, ctx, false);
+ tree = ParseUtils.findRootNonNullToken(tree);
+ viewTree = tree;
+ Dispatcher nodeOriginDispatcher = new Dispatcher() {
+ @Override
+ public Object dispatch(Node nd, java.util.Stack stack,
+ Object... nodeOutputs) {
+ ((ASTNode) nd).setOrigin(viewOrigin);
+ return null;
+ }
+ };
+ GraphWalker nodeOriginTagger = new DefaultGraphWalker(
+ nodeOriginDispatcher);
+ nodeOriginTagger.startWalking(java.util.Collections
+ . singleton(viewTree), null);
+ } catch (ParseException e) {
+ // A user could encounter this if a stored view definition contains
+ // an old SQL construct which has been eliminated in a later Hive
+ // version, so we need to provide full debugging info to help
+ // with fixing the view definition.
+ LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+ StringBuilder sb = new StringBuilder();
+ sb.append(e.getMessage());
+ ErrorMsg.renderOrigin(sb, viewOrigin);
+ throw new SemanticException(sb.toString(), e);
+ }
+ QBExpr qbexpr = new QBExpr(alias);
+ doPhase1QBExpr(viewTree, qbexpr, qb.getId(), alias, true);
+ // if skip authorization, skip checking;
+ // if it is inside a view, skip checking;
+ // if authorization flag is not enabled, skip checking.
+ // if HIVE_STATS_COLLECT_SCANCOLS is enabled, check.
+ if ((!this.skipAuthorization() && !qb.isInsideView() && HiveConf.getBoolVar(conf,
+ HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED))
+ || HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
+ qb.rewriteViewToSubq(alias, tab_name, qbexpr, tab);
+ } else {
+ qb.rewriteViewToSubq(alias, tab_name, qbexpr, null);
+ }
+ }
+
+ private boolean isPresent(String[] list, String elem) {
+ for (String s : list) {
+ if (s.toLowerCase().equals(elem)) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /*
+ * This method is invoked for unqualified column references in join conditions.
+ * This is passed in the Alias to Operator mapping in the QueryBlock so far.
+ * We try to resolve the unqualified column against each of the Operator Row Resolvers.
+ * - if the column is present in only one RowResolver, we treat this as a reference to
+ * that Operator.
+ * - if the column resolves with more than one RowResolver, we treat it as an Ambiguous
+ * reference.
+ * - if the column doesn't resolve with any RowResolver, we treat this as an Invalid
+ * reference.
+ */
+ @SuppressWarnings("rawtypes")
+ private String findAlias(ASTNode columnRef,
+ Map aliasToOpInfo) throws SemanticException {
+ String colName = unescapeIdentifier(columnRef.getChild(0).getText()
+ .toLowerCase());
+ String tabAlias = null;
+ if ( aliasToOpInfo != null ) {
+ for (Map.Entry opEntry : aliasToOpInfo.entrySet()) {
+ Operator op = opEntry.getValue();
+ RowResolver rr = opParseCtx.get(op).getRowResolver();
+ ColumnInfo colInfo = rr.get(null, colName);
+ if (colInfo != null) {
+ if (tabAlias == null) {
+ tabAlias = opEntry.getKey();
+ } else {
+ throw new SemanticException(
+ ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(columnRef.getChild(0)));
+ }
+ }
+ }
+ }
+ if ( tabAlias == null ) {
+ throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(columnRef
+ .getChild(0)));
+ }
+ return tabAlias;
+ }
+
+ @SuppressWarnings("nls")
+ void parseJoinCondPopulateAlias(QBJoinTree joinTree, ASTNode condn,
+ ArrayList leftAliases, ArrayList rightAliases,
+ ArrayList fields,
+ Map aliasToOpInfo) throws SemanticException {
+ // String[] allAliases = joinTree.getAllAliases();
+ switch (condn.getToken().getType()) {
+ case HiveParser.TOK_TABLE_OR_COL:
+ String tableOrCol = unescapeIdentifier(condn.getChild(0).getText()
+ .toLowerCase());
+ unparseTranslator.addIdentifierTranslation((ASTNode) condn.getChild(0));
+ if (isPresent(joinTree.getLeftAliases(), tableOrCol)) {
+ if (!leftAliases.contains(tableOrCol)) {
+ leftAliases.add(tableOrCol);
+ }
+ } else if (isPresent(joinTree.getRightAliases(), tableOrCol)) {
+ if (!rightAliases.contains(tableOrCol)) {
+ rightAliases.add(tableOrCol);
+ }
+ } else {
+ tableOrCol = findAlias(condn, aliasToOpInfo);
+ if (isPresent(joinTree.getLeftAliases(), tableOrCol)) {
+ if (!leftAliases.contains(tableOrCol)) {
+ leftAliases.add(tableOrCol);
+ }
+ } else {
+ if (!rightAliases.contains(tableOrCol)) {
+ rightAliases.add(tableOrCol);
+ }
+ if (joinTree.getNoSemiJoin() == false) {
+ // if this is a semijoin, we need to add the condition
+ joinTree.addRHSSemijoinColumns(tableOrCol, condn);
+ }
+ }
+ }
+ break;
+
+ case HiveParser.Identifier:
+ // it may be a field name, return the identifier and let the caller decide
+ // whether it is or not
+ if (fields != null) {
+ fields
+ .add(unescapeIdentifier(condn.getToken().getText().toLowerCase()));
+ }
+ unparseTranslator.addIdentifierTranslation(condn);
+ break;
+ case HiveParser.Number:
+ case HiveParser.StringLiteral:
+ case HiveParser.IntegralLiteral:
+ case HiveParser.NumberLiteral:
+ case HiveParser.TOK_STRINGLITERALSEQUENCE:
+ case HiveParser.TOK_CHARSETLITERAL:
+ case HiveParser.KW_TRUE:
+ case HiveParser.KW_FALSE:
+ break;
+
+ case HiveParser.TOK_FUNCTION:
+ // check all the arguments
+ for (int i = 1; i < condn.getChildCount(); i++) {
+ parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(i),
+ leftAliases, rightAliases, null, aliasToOpInfo);
+ }
+ break;
+
+ default:
+ // This is an operator - so check whether it is unary or binary operator
+ if (condn.getChildCount() == 1) {
+ parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
+ leftAliases, rightAliases, null, aliasToOpInfo);
+ } else if (condn.getChildCount() == 2) {
+
+ ArrayList fields1 = null;
+ // if it is a dot operator, remember the field name of the rhs of the
+ // left semijoin
+ if (joinTree.getNoSemiJoin() == false
+ && condn.getToken().getType() == HiveParser.DOT) {
+ // get the semijoin rhs table name and field name
+ fields1 = new ArrayList();
+ int rhssize = rightAliases.size();
+ parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
+ leftAliases, rightAliases, null, aliasToOpInfo);
+ String rhsAlias = null;
+
+ if (rightAliases.size() > rhssize) { // the new table is rhs table
+ rhsAlias = rightAliases.get(rightAliases.size() - 1);
+ }
+
+ parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
+ leftAliases, rightAliases, fields1, aliasToOpInfo);
+ if (rhsAlias != null && fields1.size() > 0) {
+ joinTree.addRHSSemijoinColumns(rhsAlias, condn);
+ }
+ } else {
+ parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
+ leftAliases, rightAliases, null, aliasToOpInfo);
+ parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
+ leftAliases, rightAliases, fields1, aliasToOpInfo);
+ }
+ } else {
+ throw new SemanticException(condn.toStringTree() + " encountered with "
+ + condn.getChildCount() + " children");
+ }
+ break;
+ }
+ }
+
+ private void populateAliases(List leftAliases,
+ List rightAliases, ASTNode condn, QBJoinTree joinTree,
+ List leftSrc) throws SemanticException {
+ if ((leftAliases.size() != 0) && (rightAliases.size() != 0)) {
+ throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1
+ .getMsg(condn));
+ }
+
+ if (rightAliases.size() != 0) {
+ assert rightAliases.size() == 1;
+ joinTree.getExpressions().get(1).add(condn);
+ } else if (leftAliases.size() != 0) {
+ joinTree.getExpressions().get(0).add(condn);
+ for (String s : leftAliases) {
+ if (!leftSrc.contains(s)) {
+ leftSrc.add(s);
+ }
+ }
+ } else {
+ throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_2
+ .getMsg(condn));
+ }
+ }
+
+ /*
+ * refactored out of the Equality case of parseJoinCondition
+ * so that this can be recursively called on its left tree in the case when
+ * only left sources are referenced in a Predicate
+ */
+ void applyEqualityPredicateToQBJoinTree(QBJoinTree joinTree,
+ JoinType type,
+ List leftSrc,
+ ASTNode joinCond,
+ ASTNode leftCondn,
+ ASTNode rightCondn,
+ List leftCondAl1,
+ List leftCondAl2,
+ List rightCondAl1,
+ List rightCondAl2) throws SemanticException {
+ if (leftCondAl1.size() != 0) {
+ if ((rightCondAl1.size() != 0)
+ || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
+ if (type.equals(JoinType.LEFTOUTER) ||
+ type.equals(JoinType.FULLOUTER)) {
+ joinTree.getFilters().get(0).add(joinCond);
+ } else {
+ /*
+ * If the rhs references table sources and this QBJoinTree has a leftTree;
+ * hand it to the leftTree and let it recursively handle it.
+ * There are 3 cases of passing a condition down:
+ * 1. The leftSide && rightSide don't contains references to the leftTree's rightAlias
+ * => pass the lists down as is.
+ * 2. The leftSide contains refs to the leftTree's rightAlias, the rightSide doesn't
+ * => switch the leftCondAl1 and leftConAl2 lists and pass down.
+ * 3. The rightSide contains refs to the leftTree's rightAlias, the leftSide doesn't
+ * => switch the rightCondAl1 and rightConAl2 lists and pass down.
+ * 4. In case both contain references to the leftTree's rightAlias
+ * => we cannot push the condition down.
+ * 5. If either contain references to both left & right
+ * => we cannot push forward.
+ */
+ if (rightCondAl1.size() != 0) {
+ QBJoinTree leftTree = joinTree.getJoinSrc();
+ List leftTreeLeftSrc = new ArrayList();
+ if (leftTree != null && leftTree.getNoOuterJoin()) {
+ String leftTreeRightSource = leftTree.getRightAliases() != null &&
+ leftTree.getRightAliases().length > 0 ?
+ leftTree.getRightAliases()[0] : null;
+
+ boolean leftHasRightReference = false;
+ for (String r : leftCondAl1) {
+ if (r.equals(leftTreeRightSource)) {
+ leftHasRightReference = true;
+ break;
+ }
+ }
+ boolean rightHasRightReference = false;
+ for (String r : rightCondAl1) {
+ if (r.equals(leftTreeRightSource)) {
+ rightHasRightReference = true;
+ break;
+ }
+ }
+
+ boolean pushedDown = false;
+ if ( !leftHasRightReference && !rightHasRightReference ) {
+ applyEqualityPredicateToQBJoinTree(leftTree, type, leftTreeLeftSrc,
+ joinCond, leftCondn, rightCondn,
+ leftCondAl1, leftCondAl2,
+ rightCondAl1, rightCondAl2);
+ pushedDown = true;
+ } else if ( !leftHasRightReference && rightHasRightReference && rightCondAl1.size() == 1 ) {
+ applyEqualityPredicateToQBJoinTree(leftTree, type, leftTreeLeftSrc,
+ joinCond, leftCondn, rightCondn,
+ leftCondAl1, leftCondAl2,
+ rightCondAl2, rightCondAl1);
+ pushedDown = true;
+ } else if (leftHasRightReference && !rightHasRightReference && leftCondAl1.size() == 1 ) {
+ applyEqualityPredicateToQBJoinTree(leftTree, type, leftTreeLeftSrc,
+ joinCond, leftCondn, rightCondn,
+ leftCondAl2, leftCondAl1,
+ rightCondAl1, rightCondAl2);
+ pushedDown = true;
+ }
+
+ if (leftTreeLeftSrc.size() == 1) {
+ leftTree.setLeftAlias(leftTreeLeftSrc.get(0));
+ }
+ if ( pushedDown) {
+ return;
+ }
+ } // leftTree != null
+ }
+ joinTree.getFiltersForPushing().get(0).add(joinCond);
+ }
+ } else if (rightCondAl2.size() != 0) {
+ populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree,
+ leftSrc);
+ populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree,
+ leftSrc);
+ boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS;
+ joinTree.getNullSafes().add(nullsafe);
+ }
+ } else if (leftCondAl2.size() != 0) {
+ if ((rightCondAl2.size() != 0)
+ || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
+ if (type.equals(JoinType.RIGHTOUTER)
+ || type.equals(JoinType.FULLOUTER)) {
+ joinTree.getFilters().get(1).add(joinCond);
+ } else {
+ joinTree.getFiltersForPushing().get(1).add(joinCond);
+ }
+ } else if (rightCondAl1.size() != 0) {
+ populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree,
+ leftSrc);
+ populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree,
+ leftSrc);
+ boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS;
+ joinTree.getNullSafes().add(nullsafe);
+ }
+ } else if (rightCondAl1.size() != 0) {
+ if (type.equals(JoinType.LEFTOUTER)
+ || type.equals(JoinType.FULLOUTER)) {
+ joinTree.getFilters().get(0).add(joinCond);
+ } else {
+ joinTree.getFiltersForPushing().get(0).add(joinCond);
+ }
+ } else {
+ if (type.equals(JoinType.RIGHTOUTER)
+ || type.equals(JoinType.FULLOUTER)) {
+ joinTree.getFilters().get(1).add(joinCond);
+ } else if (type.equals(JoinType.LEFTSEMI)) {
+ joinTree.getExpressions().get(0).add(leftCondn);
+ joinTree.getExpressions().get(1).add(rightCondn);
+ boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS;
+ joinTree.getNullSafes().add(nullsafe);
+ joinTree.getFiltersForPushing().get(1).add(joinCond);
+ } else {
+ joinTree.getFiltersForPushing().get(1).add(joinCond);
+ }
+ }
+
+ }
+
+ @SuppressWarnings("rawtypes")
+ private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond, List leftSrc,
+ Map aliasToOpInfo)
+ throws SemanticException {
+ if (joinCond == null) {
+ return;
+ }
+ JoinCond cond = joinTree.getJoinCond()[0];
+
+ JoinType type = cond.getJoinType();
+ parseJoinCondition(joinTree, joinCond, leftSrc, type, aliasToOpInfo);
+
+ List> filters = joinTree.getFilters();
+ if (type == JoinType.LEFTOUTER || type == JoinType.FULLOUTER) {
+ joinTree.addFilterMapping(cond.getLeft(), cond.getRight(), filters.get(0).size());
+ }
+ if (type == JoinType.RIGHTOUTER || type == JoinType.FULLOUTER) {
+ joinTree.addFilterMapping(cond.getRight(), cond.getLeft(), filters.get(1).size());
+ }
+ }
+
+ /**
+ * Parse the join condition. If the condition is a join condition, throw an
+ * error if it is not an equality. Otherwise, break it into left and right
+ * expressions and store in the join tree. If the condition is a join filter,
+ * add it to the filter list of join tree. The join condition can contains
+ * conditions on both the left and tree trees and filters on either.
+ * Currently, we only support equi-joins, so we throw an error if the
+ * condition involves both subtrees and is not a equality. Also, we only
+ * support AND i.e ORs are not supported currently as their semantics are not
+ * very clear, may lead to data explosion and there is no usecase.
+ *
+ * @param joinTree
+ * jointree to be populated
+ * @param joinCond
+ * join condition
+ * @param leftSrc
+ * left sources
+ * @throws SemanticException
+ */
+ @SuppressWarnings("rawtypes")
+ private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond,
+ List leftSrc, JoinType type,
+ Map aliasToOpInfo) throws SemanticException {
+ if (joinCond == null) {
+ return;
+ }
+
+ switch (joinCond.getToken().getType()) {
+ case HiveParser.KW_OR:
+ throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_3
+ .getMsg(joinCond));
+
+ case HiveParser.KW_AND:
+ parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(0), leftSrc, type, aliasToOpInfo);
+ parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(1), leftSrc, type, aliasToOpInfo);
+ break;
+
+ case HiveParser.EQUAL_NS:
+ case HiveParser.EQUAL:
+ ASTNode leftCondn = (ASTNode) joinCond.getChild(0);
+ ArrayList leftCondAl1 = new ArrayList();
+ ArrayList leftCondAl2 = new ArrayList();
+ parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2,
+ null, aliasToOpInfo);
+
+ ASTNode rightCondn = (ASTNode) joinCond.getChild(1);
+ ArrayList rightCondAl1 = new ArrayList();
+ ArrayList rightCondAl2 = new ArrayList();
+ parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1,
+ rightCondAl2, null, aliasToOpInfo);
+
+ // is it a filter or a join condition
+ // if it is filter see if it can be pushed above the join
+ // filter cannot be pushed if
+ // * join is full outer or
+ // * join is left outer and filter is on left alias or
+ // * join is right outer and filter is on right alias
+ if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0))
+ || ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) {
+ throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1
+ .getMsg(joinCond));
+ }
+
+ applyEqualityPredicateToQBJoinTree(joinTree, type, leftSrc,
+ joinCond, leftCondn, rightCondn,
+ leftCondAl1, leftCondAl2,
+ rightCondAl1, rightCondAl2);
+
+ break;
+
+ default:
+ boolean isFunction = (joinCond.getType() == HiveParser.TOK_FUNCTION);
+
+ // Create all children
+ int childrenBegin = (isFunction ? 1 : 0);
+ ArrayList> leftAlias = new ArrayList>(
+ joinCond.getChildCount() - childrenBegin);
+ ArrayList> rightAlias = new ArrayList>(
+ joinCond.getChildCount() - childrenBegin);
+ for (int ci = 0; ci < joinCond.getChildCount() - childrenBegin; ci++) {
+ ArrayList left = new ArrayList();
+ ArrayList right = new ArrayList();
+ leftAlias.add(left);
+ rightAlias.add(right);
+ }
+
+ for (int ci = childrenBegin; ci < joinCond.getChildCount(); ci++) {
+ parseJoinCondPopulateAlias(joinTree, (ASTNode) joinCond.getChild(ci),
+ leftAlias.get(ci - childrenBegin), rightAlias.get(ci
+ - childrenBegin), null, aliasToOpInfo);
+ }
+
+ boolean leftAliasNull = true;
+ for (ArrayList left : leftAlias) {
+ if (left.size() != 0) {
+ leftAliasNull = false;
+ break;
+ }
+ }
+
+ boolean rightAliasNull = true;
+ for (ArrayList right : rightAlias) {
+ if (right.size() != 0) {
+ rightAliasNull = false;
+ break;
+ }
+ }
+
+ if (!leftAliasNull && !rightAliasNull) {
+ throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1
+ .getMsg(joinCond));
+ }
+
+ if (!leftAliasNull) {
+ if (type.equals(JoinType.LEFTOUTER)
+ || type.equals(JoinType.FULLOUTER)) {
+ joinTree.getFilters().get(0).add(joinCond);
+ } else {
+ joinTree.getFiltersForPushing().get(0).add(joinCond);
+ }
+ } else {
+ if (type.equals(JoinType.RIGHTOUTER)
+ || type.equals(JoinType.FULLOUTER)) {
+ joinTree.getFilters().get(1).add(joinCond);
+ } else {
+ joinTree.getFiltersForPushing().get(1).add(joinCond);
+ }
+ }
+
+ break;
+ }
+ }
+
+ @SuppressWarnings("rawtypes")
+ private void extractJoinCondsFromWhereClause(QBJoinTree joinTree, QB qb, String dest, ASTNode predicate,
+ Map aliasToOpInfo) throws SemanticException {
+
+ switch (predicate.getType()) {
+ case HiveParser.KW_AND:
+ extractJoinCondsFromWhereClause(joinTree, qb, dest,
+ (ASTNode) predicate.getChild(0), aliasToOpInfo);
+ extractJoinCondsFromWhereClause(joinTree, qb, dest,
+ (ASTNode) predicate.getChild(1), aliasToOpInfo);
+ break;
+ case HiveParser.EQUAL_NS:
+ case HiveParser.EQUAL:
+
+ ASTNode leftCondn = (ASTNode) predicate.getChild(0);
+ ArrayList leftCondAl1 = new ArrayList();
+ ArrayList leftCondAl2 = new ArrayList();
+ try {
+ parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2,
+ null, aliasToOpInfo);
+ } catch(SemanticException se) {
+ // suppress here; if it is a real issue will get caught in where clause handling.
+ return;
+ }
+
+ ASTNode rightCondn = (ASTNode) predicate.getChild(1);
+ ArrayList rightCondAl1 = new ArrayList();
+ ArrayList rightCondAl2 = new ArrayList();
+ try {
+ parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1,
+ rightCondAl2, null, aliasToOpInfo);
+ } catch(SemanticException se) {
+ // suppress here; if it is a real issue will get caught in where clause handling.
+ return;
+ }
+
+ if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0))
+ || ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) {
+ // this is not a join condition.
+ return;
+ }
+
+ if (((leftCondAl1.size() == 0) && (leftCondAl2.size() == 0))
+ || ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
+ // this is not a join condition. Will get handled by predicate pushdown.
+ return;
+ }
+
+ List leftSrc = new ArrayList();
+ JoinCond cond = joinTree.getJoinCond()[0];
+ JoinType type = cond.getJoinType();
+ applyEqualityPredicateToQBJoinTree(joinTree, type, leftSrc,
+ predicate, leftCondn, rightCondn,
+ leftCondAl1, leftCondAl2,
+ rightCondAl1, rightCondAl2);
+ if (leftSrc.size() == 1) {
+ joinTree.setLeftAlias(leftSrc.get(0));
+ }
+
+ // todo: hold onto this predicate, so that we don't add it to the Filter Operator.
+
+ break;
+ default:
+ return;
+ }
+ }
+
+ @SuppressWarnings("nls")
+ public Operator putOpInsertMap(Operator op,
+ RowResolver rr) {
+ OpParseContext ctx = new OpParseContext(rr);
+ opParseCtx.put(op, ctx);
+ op.augmentPlan();
+ return op;
+ }
+
+ @SuppressWarnings("nls")
+ private Operator genHavingPlan(String dest, QB qb, Operator input,
+ Map aliasToOpInfo)
+ throws SemanticException {
+
+ ASTNode havingExpr = qb.getParseInfo().getHavingForClause(dest);
+
+ OpParseContext inputCtx = opParseCtx.get(input);
+ RowResolver inputRR = inputCtx.getRowResolver();
+ Map exprToColumnAlias = qb.getParseInfo().getAllExprToColumnAlias();
+ for (ASTNode astNode : exprToColumnAlias.keySet()) {
+ if (inputRR.getExpression(astNode) != null) {
+ inputRR.put("", exprToColumnAlias.get(astNode), inputRR.getExpression(astNode));
+ }
+ }
+ ASTNode condn = (ASTNode) havingExpr.getChild(0);
+
+ /*
+ * Now a having clause can contain a SubQuery predicate;
+ * so we invoke genFilterPlan to handle SubQuery algebraic transformation,
+ * just as is done for SubQuery predicates appearing in the Where Clause.
+ */
+ Operator output = genFilterPlan(condn, qb, input, aliasToOpInfo, true, false);
+ output = putOpInsertMap(output, inputRR);
+ return output;
+ }
+
+ private Operator genPlanForSubQueryPredicate(
+ QB qbSQ,
+ ISubQueryJoinInfo subQueryPredicate) throws SemanticException {
+ qbSQ.setSubQueryDef(subQueryPredicate.getSubQuery());
+ Phase1Ctx ctx_1 = initPhase1Ctx();
+ doPhase1(subQueryPredicate.getSubQueryAST(), qbSQ, ctx_1, null);
+ getMetaData(qbSQ);
+ Operator op = genPlan(qbSQ);
+ return op;
+ }
+
+ @SuppressWarnings("nls")
+ private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input,
+ Map aliasToOpInfo,
+ boolean forHavingClause, boolean forGroupByClause)
+ throws SemanticException {
+
+ OpParseContext inputCtx = opParseCtx.get(input);
+ RowResolver inputRR = inputCtx.getRowResolver();
+
+ /*
+ * Handling of SubQuery Expressions:
+ * if "Where clause contains no SubQuery expressions" then
+ * -->[true] ===CONTINUE_FILTER_PROCESSING===
+ * else
+ * -->[false] "extract SubQuery expressions\n from Where clause"
+ * if "this is a nested SubQuery or \nthere are more than 1 SubQuery expressions" then
+ * -->[yes] "throw Unsupported Error"
+ * else
+ * --> "Rewrite Search condition to \nremove SubQuery predicate"
+ * --> "build QBSubQuery"
+ * --> "extract correlated predicates \nfrom Where Clause"
+ * --> "add correlated Items to \nSelect List and Group By"
+ * --> "construct Join Predicate \nfrom correlation predicates"
+ * --> "Generate Plan for\n modified SubQuery"
+ * --> "Build the Join Condition\n for Parent Query to SubQuery join"
+ * --> "Build the QBJoinTree from the Join condition"
+ * --> "Update Parent Query Filter\n with any Post Join conditions"
+ * --> ===CONTINUE_FILTER_PROCESSING===
+ * endif
+ * endif
+ *
+ * Support for Sub Queries in Having Clause:
+ * - By and large this works the same way as SubQueries in the Where Clause.
+ * - The one addum is the handling of aggregation expressions from the Outer Query
+ * appearing in correlation clauses.
+ * - So such correlating predicates are allowed:
+ * min(OuterQuert.x) = SubQuery.y
+ * - this requires special handling when converting to joins. See QBSubQuery.rewrite
+ * method method for detailed comments.
+ */
+ List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond);
+
+ if ( subQueriesInOriginalTree.size() > 0 ) {
+
+ /*
+ * Restriction.9.m :: disallow nested SubQuery expressions.
+ */
+ if (qb.getSubQueryPredicateDef() != null ) {
+ throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+ subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported."));
+ }
+
+ /*
+ * Restriction.8.m :: We allow only 1 SubQuery expression per Query.
+ */
+ if (subQueriesInOriginalTree.size() > 1 ) {
+
+ throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+ subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported."));
+ }
+
+ /*
+ * Clone the Search AST; apply all rewrites on the clone.
+ */
+ ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond);
+ List subQueries = SubQueryUtils.findSubQueries(clonedSearchCond);
+
+ for(int i=0; i < subQueries.size(); i++) {
+ ASTNode subQueryAST = subQueries.get(i);
+ ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i);
+
+ int sqIdx = qb.incrNumSubQueryPredicates();
+ clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST);
+
+ QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(),
+ sqIdx, subQueryAST, originalSubQueryAST, ctx);
+
+ if ( !forHavingClause ) {
+ qb.setWhereClauseSubQueryPredicate(subQuery);
+ } else {
+ qb.setHavingClauseSubQueryPredicate(subQuery);
+ }
+ String havingInputAlias = null;
+
+ if ( forHavingClause ) {
+ havingInputAlias = "gby_sq" + sqIdx;
+ aliasToOpInfo.put(havingInputAlias, input);
+ }
+
+ subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias, aliasToOpInfo.keySet());
+
+ QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true);
+ Operator sqPlanTopOp = genPlanForSubQueryPredicate(qbSQ, subQuery);
+ aliasToOpInfo.put(subQuery.getAlias(), sqPlanTopOp);
+ RowResolver sqRR = opParseCtx.get(sqPlanTopOp).getRowResolver();
+
+ /*
+ * Check.5.h :: For In and Not In the SubQuery must implicitly or
+ * explicitly only contain one select item.
+ */
+ if ( subQuery.getOperator().getType() != SubQueryType.EXISTS &&
+ subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS &&
+ sqRR.getColumnInfos().size() -
+ subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1 ) {
+ throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
+ subQueryAST, "SubQuery can contain only 1 item in Select List."));
+ }
+
+ /*
+ * If this is a Not In SubQuery Predicate then Join in the Null Check SubQuery.
+ * See QBSubQuery.NotInCheck for details on why and how this is constructed.
+ */
+ if ( subQuery.getNotInCheck() != null ) {
+ QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck();
+ notInCheck.setSQRR(sqRR);
+ QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true);
+ Operator sqnicPlanTopOp = genPlanForSubQueryPredicate(qbSQ_nic, notInCheck);
+ aliasToOpInfo.put(notInCheck.getAlias(), sqnicPlanTopOp);
+ QBJoinTree joinTree_nic = genSQJoinTree(qb, notInCheck,
+ input,
+ aliasToOpInfo);
+ pushJoinFilters(qb, joinTree_nic, aliasToOpInfo, false);
+ input = genJoinOperator(qbSQ_nic, joinTree_nic, aliasToOpInfo, input);
+ inputRR = opParseCtx.get(input).getRowResolver();
+ if ( forHavingClause ) {
+ aliasToOpInfo.put(havingInputAlias, input);
+ }
+ }
+
+ /*
+ * Gen Join between outer Operator and SQ op
+ */
+ subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias);
+ QBJoinTree joinTree = genSQJoinTree(qb, subQuery,
+ input,
+ aliasToOpInfo);
+ /*
+ * push filters only for this QBJoinTree. Child QBJoinTrees have already been handled.
+ */
+ pushJoinFilters(qb, joinTree, aliasToOpInfo, false);
+ input = genJoinOperator(qbSQ, joinTree, aliasToOpInfo, input);
+ searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond);
+ }
+ }
+
+ return genFilterPlan(qb, searchCond, input, forHavingClause || forGroupByClause);
+ }
+
+ /**
+ * create a filter plan. The condition and the inputs are specified.
+ *
+ * @param qb
+ * current query block
+ * @param condn
+ * The condition to be resolved
+ * @param input
+ * the input operator
+ */
+ @SuppressWarnings("nls")
+ private Operator genFilterPlan(QB qb, ASTNode condn, Operator input, boolean useCaching)
+ throws SemanticException {
+
+ OpParseContext inputCtx = opParseCtx.get(input);
+ RowResolver inputRR = inputCtx.getRowResolver();
+
+ ExprNodeDesc filterCond = genExprNodeDesc(condn, inputRR, useCaching, isCBOExecuted());
+ if (filterCond instanceof ExprNodeConstantDesc) {
+ ExprNodeConstantDesc c = (ExprNodeConstantDesc) filterCond;
+ if (Boolean.TRUE.equals(c.getValue())) {
+ // If filter condition is TRUE, we ignore it
+ return input;
+ }
+ if (ExprNodeDescUtils.isNullConstant(c)) {
+ // If filter condition is NULL, transform to FALSE
+ filterCond = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, false);
+ }
+ }
+
+ Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
+ new FilterDesc(filterCond, false), new RowSchema(
+ inputRR.getColumnInfos()), input), inputRR);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Created Filter Plan for " + qb.getId() + " row schema: "
+ + inputRR.toString());
+ }
+ return output;
+ }
+
+ /*
+ * for inner joins push a 'is not null predicate' to the join sources for
+ * every non nullSafe predicate.
+ */
+ private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input,
+ QBJoinTree joinTree, ExprNodeDesc[] joinKeys) throws SemanticException {
+
+ if (qb == null || joinTree == null) {
+ return input;
+ }
+
+ if (!joinTree.getNoOuterJoin()) {
+ return input;
+ }
+
+ if (joinKeys == null || joinKeys.length == 0) {
+ return input;
+ }
+ Map hashes = new HashMap();
+ if (input instanceof FilterOperator) {
+ ExprNodeDescUtils.getExprNodeColumnDesc(Arrays.asList(((FilterDesc)input.getConf()).getPredicate()), hashes);
+ }
+ ExprNodeDesc filterPred = null;
+ List nullSafes = joinTree.getNullSafes();
+ for (int i = 0; i < joinKeys.length; i++) {
+ if (nullSafes.get(i) || (joinKeys[i] instanceof ExprNodeColumnDesc &&
+ ((ExprNodeColumnDesc)joinKeys[i]).getIsPartitionColOrVirtualCol())) {
+ // no need to generate is not null predicate for partitioning or
+ // virtual column, since those columns can never be null.
+ continue;
+ }
+ if(null != hashes.get(joinKeys[i].hashCode())) {
+ // there is already a predicate on this src.
+ continue;
+ }
+ List args = new ArrayList();
+ args.add(joinKeys[i]);
+ ExprNodeDesc nextExpr = ExprNodeGenericFuncDesc.newInstance(
+ FunctionRegistry.getFunctionInfo("isnotnull").getGenericUDF(), args);
+ filterPred = filterPred == null ? nextExpr : ExprNodeDescUtils
+ .mergePredicates(filterPred, nextExpr);
+ }
+
+ if (filterPred == null) {
+ return input;
+ }
+
+ OpParseContext inputCtx = opParseCtx.get(input);
+ RowResolver inputRR = inputCtx.getRowResolver();
+
+ if (input instanceof FilterOperator) {
+ FilterOperator f = (FilterOperator) input;
+ List preds = new ArrayList();
+ preds.add(f.getConf().getPredicate());
+ preds.add(filterPred);
+ f.getConf().setPredicate(ExprNodeDescUtils.mergePredicates(preds));
+
+ return input;
+ }
+
+ FilterDesc filterDesc = new FilterDesc(filterPred, false);
+ filterDesc.setGenerated(true);
+ Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(filterDesc,
+ new RowSchema(inputRR.getColumnInfos()), input), inputRR);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Created Filter Plan for " + qb.getId() + " row schema: "
+ + inputRR.toString());
+ }
+ return output;
+ }
+
+ @SuppressWarnings("nls")
+ // TODO: make aliases unique, otherwise needless rewriting takes place
+ Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel,
+ ArrayList col_list, HashSet excludeCols, RowResolver input,
+ RowResolver colSrcRR, Integer pos, RowResolver output, List aliases,
+ boolean ensureUniqueCols) throws SemanticException {
+
+ if (colSrcRR == null) {
+ colSrcRR = input;
+ }
+ // The table alias should exist
+ if (tabAlias != null && !colSrcRR.hasTableAlias(tabAlias)) {
+ throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(sel));
+ }
+
+ // TODO: Have to put in the support for AS clause
+ Pattern regex = null;
+ try {
+ regex = Pattern.compile(colRegex, Pattern.CASE_INSENSITIVE);
+ } catch (PatternSyntaxException e) {
+ throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel, e
+ .getMessage()));
+ }
+
+ StringBuilder replacementText = new StringBuilder();
+ int matched = 0;
+ // add empty string to the list of aliases. Some operators (ex. GroupBy) add
+ // ColumnInfos for table alias "".
+ if (!aliases.contains("")) {
+ aliases.add("");
+ }
+ /*
+ * track the input ColumnInfos that are added to the output.
+ * if a columnInfo has multiple mappings; then add the column only once,
+ * but carry the mappings forward.
+ */
+ Map inputColsProcessed = new HashMap();
+ // For expr "*", aliases should be iterated in the order they are specified
+ // in the query.
+ for (String alias : aliases) {
+ HashMap fMap = colSrcRR.getFieldMap(alias);
+ if (fMap == null) {
+ continue;
+ }
+ // For the tab.* case, add all the columns to the fieldList
+ // from the input schema
+ for (Map.Entry entry : fMap.entrySet()) {
+ ColumnInfo colInfo = entry.getValue();
+ if (excludeCols != null && excludeCols.contains(colInfo)) {
+ continue; // This was added during plan generation.
+ }
+ // First, look up the column from the source against which * is to be resolved.
+ // We'd later translated this into the column from proper input, if it's valid.
+ // TODO: excludeCols may be possible to remove using the same technique.
+ String name = colInfo.getInternalName();
+ String[] tmp = colSrcRR.reverseLookup(name);
+
+ // Skip the colinfos which are not for this particular alias
+ if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) {
+ continue;
+ }
+
+ if (colInfo.getIsVirtualCol() && colInfo.isHiddenVirtualCol()) {
+ continue;
+ }
+
+ // Not matching the regex?
+ if (!regex.matcher(tmp[1]).matches()) {
+ continue;
+ }
+
+ // If input (GBY) is different than the source of columns, find the same column in input.
+ // TODO: This is fraught with peril.
+ if (input != colSrcRR) {
+ colInfo = input.get(tabAlias, tmp[1]);
+ if (colInfo == null) {
+ LOG.error("Cannot find colInfo for " + tabAlias + "." + tmp[1]
+ + ", derived from [" + colSrcRR + "], in [" + input + "]");
+ throw new SemanticException(ErrorMsg.NON_KEY_EXPR_IN_GROUPBY, tmp[1]);
+ }
+ String oldCol = null;
+ if (LOG.isDebugEnabled()) {
+ oldCol = name + " => " + (tmp == null ? "null" : (tmp[0] + "." + tmp[1]));
+ }
+ name = colInfo.getInternalName();
+ tmp = input.reverseLookup(name);
+ if (LOG.isDebugEnabled()) {
+ String newCol = name + " => " + (tmp == null ? "null" : (tmp[0] + "." + tmp[1]));
+ LOG.debug("Translated [" + oldCol + "] to [" + newCol + "]");
+ }
+ }
+
+ ColumnInfo oColInfo = inputColsProcessed.get(colInfo);
+ if (oColInfo == null) {
+ ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(),
+ name, colInfo.getTabAlias(), colInfo.getIsVirtualCol(),
+ colInfo.isSkewedCol());
+ col_list.add(expr);
+ oColInfo = new ColumnInfo(getColumnInternalName(pos),
+ colInfo.getType(), colInfo.getTabAlias(),
+ colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
+ inputColsProcessed.put(colInfo, oColInfo);
+ }
+ if (ensureUniqueCols) {
+ if (!output.putWithCheck(tmp[0], tmp[1], null, oColInfo)) {
+ throw new CalciteSemanticException("Cannot add column to RR: " + tmp[0] + "." + tmp[1]
+ + " => " + oColInfo + " due to duplication, see previous warnings",
+ UnsupportedFeature.Duplicates_in_RR);
+ }
+ } else {
+ output.put(tmp[0], tmp[1], oColInfo);
+ }
+ pos = Integer.valueOf(pos.intValue() + 1);
+ matched++;
+
+ if (unparseTranslator.isEnabled() || tableMask.isEnabled()) {
+ if (replacementText.length() > 0) {
+ replacementText.append(", ");
+ }
+ replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf));
+ replacementText.append(".");
+ replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf));
+ }
+ }
+ }
+ if (matched == 0) {
+ throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel));
+ }
+
+ if (unparseTranslator.isEnabled()) {
+ unparseTranslator.addTranslation(sel, replacementText.toString());
+ } else if (tableMask.isEnabled()) {
+ tableMask.addTranslation(sel, replacementText.toString());
+ }
+ return pos;
+ }
+
+ public static String getColumnInternalName(int pos) {
+ return HiveConf.getColumnInternalName(pos);
+ }
+
+ private String getScriptProgName(String cmd) {
+ int end = cmd.indexOf(" ");
+ return (end == -1) ? cmd : cmd.substring(0, end);
+ }
+
+ private String getScriptArgs(String cmd) {
+ int end = cmd.indexOf(" ");
+ return (end == -1) ? "" : cmd.substring(end, cmd.length());
+ }
+
+ static int getPositionFromInternalName(String internalName) {
+ return HiveConf.getPositionFromInternalName(internalName);
+ }
+
+ private String fetchFilesNotInLocalFilesystem(String cmd) {
+ SessionState ss = SessionState.get();
+ String progName = getScriptProgName(cmd);
+
+ if (!ResourceDownloader.isFileUri(progName)) {
+ String filePath = ss.add_resource(ResourceType.FILE, progName, true);
+ Path p = new Path(filePath);
+ String fileName = p.getName();
+ String scriptArgs = getScriptArgs(cmd);
+ String finalCmd = fileName + scriptArgs;
+ return finalCmd;
+ }
+
+ return cmd;
+ }
+
+ private TableDesc getTableDescFromSerDe(ASTNode child, String cols,
+ String colTypes, boolean defaultCols) throws SemanticException {
+ if (child.getType() == HiveParser.TOK_SERDENAME) {
+ String serdeName = unescapeSQLString(child.getChild(0).getText());
+ Class extends Deserializer> serdeClass = null;
+
+ try {
+ serdeClass = (Class extends Deserializer>) Class.forName(serdeName,
+ true, Utilities.getSessionSpecifiedClassLoader());
+ } catch (ClassNotFoundException e) {
+ throw new SemanticException(e);
+ }
+
+ TableDesc tblDesc = PlanUtils.getTableDesc(serdeClass, Integer
+ .toString(Utilities.tabCode), cols, colTypes, defaultCols);
+ // copy all the properties
+ if (child.getChildCount() == 2) {
+ ASTNode prop = (ASTNode) ((ASTNode) child.getChild(1)).getChild(0);
+ for (int propChild = 0; propChild < prop.getChildCount(); propChild++) {
+ String key = unescapeSQLString(prop.getChild(propChild).getChild(0)
+ .getText());
+ String value = unescapeSQLString(prop.getChild(propChild).getChild(1)
+ .getText());
+ tblDesc.getProperties().setProperty(key, value);
+ }
+ }
+ return tblDesc;
+ } else if (child.getType() == HiveParser.TOK_SERDEPROPS) {
+ TableDesc tblDesc = PlanUtils.getDefaultTableDesc(Integer
+ .toString(Utilities.ctrlaCode), cols, colTypes, defaultCols);
+ int numChildRowFormat = child.getChildCount();
+ for (int numC = 0; numC < numChildRowFormat; numC++) {
+ ASTNode rowChild = (ASTNode) child.getChild(numC);
+ switch (rowChild.getToken().getType()) {
+ case HiveParser.TOK_TABLEROWFORMATFIELD:
+ String fieldDelim = unescapeSQLString(rowChild.getChild(0).getText());
+ tblDesc.getProperties()
+ .setProperty(serdeConstants.FIELD_DELIM, fieldDelim);
+ tblDesc.getProperties().setProperty(serdeConstants.SERIALIZATION_FORMAT,
+ fieldDelim);
+
+ if (rowChild.getChildCount() >= 2) {
+ String fieldEscape = unescapeSQLString(rowChild.getChild(1)
+ .getText());
+ tblDesc.getProperties().setProperty(serdeConstants.ESCAPE_CHAR,
+ fieldEscape);
+ }
+ break;
+ case HiveParser.TOK_TABLEROWFORMATCOLLITEMS:
+ tblDesc.getProperties().setProperty(serdeConstants.COLLECTION_DELIM,
+ unescapeSQLString(rowChild.getChild(0).getText()));
+ break;
+ case HiveParser.TOK_TABLEROWFORMATMAPKEYS:
+ tblDesc.getProperties().setProperty(serdeConstants.MAPKEY_DELIM,
+ unescapeSQLString(rowChild.getChild(0).getText()));
+ break;
+ case HiveParser.TOK_TABLEROWFORMATLINES:
+ String lineDelim = unescapeSQLString(rowChild.getChild(0).getText());
+ tblDesc.getProperties().setProperty(serdeConstants.LINE_DELIM, lineDelim);
+ if (!lineDelim.equals("\n") && !lineDelim.equals("10")) {
+ throw new SemanticException(generateErrorMessage(rowChild,
+ ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg()));
+ }
+ break;
+ case HiveParser.TOK_TABLEROWFORMATNULL:
+ String nullFormat = unescapeSQLString(rowChild.getChild(0).getText());
+ tblDesc.getProperties().setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT,
+ nullFormat);
+ break;
+ default:
+ assert false;
+ }
+ }
+
+ return tblDesc;
+ }
+
+ // should never come here
+ return null;
+ }
+
+ private void failIfColAliasExists(Set nameSet, String name)
+ throws SemanticException {
+ if (nameSet.contains(name)) {
+ throw new SemanticException(ErrorMsg.COLUMN_ALIAS_ALREADY_EXISTS
+ .getMsg(name));
+ }
+ nameSet.add(name);
+ }
+
+ @SuppressWarnings("nls")
+ private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input)
+ throws SemanticException {
+ // If there is no "AS" clause, the output schema will be "key,value"
+ ArrayList outputCols = new ArrayList();
+ int inputSerDeNum = 1, inputRecordWriterNum = 2;
+ int outputSerDeNum = 4, outputRecordReaderNum = 5;
+ int outputColsNum = 6;
+ boolean outputColNames = false, outputColSchemas = false;
+ int execPos = 3;
+ boolean defaultOutputCols = false;
+
+ // Go over all the children
+ if (trfm.getChildCount() > outputColsNum) {
+ ASTNode outCols = (ASTNode) trfm.getChild(outputColsNum);
+ if (outCols.getType() == HiveParser.TOK_ALIASLIST) {
+ outputColNames = true;
+ } else if (outCols.getType() == HiveParser.TOK_TABCOLLIST) {
+ outputColSchemas = true;
+ }
+ }
+
+ // If column type is not specified, use a string
+ if (!outputColNames && !outputColSchemas) {
+ String intName = getColumnInternalName(0);
+ ColumnInfo colInfo = new ColumnInfo(intName,
+ TypeInfoFactory.stringTypeInfo, null, false);
+ colInfo.setAlias("key");
+ outputCols.add(colInfo);
+ intName = getColumnInternalName(1);
+ colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null,
+ false);
+ colInfo.setAlias("value");
+ outputCols.add(colInfo);
+ defaultOutputCols = true;
+ } else {
+ ASTNode collist = (ASTNode) trfm.getChild(outputColsNum);
+ int ccount = collist.getChildCount();
+
+ Set colAliasNamesDuplicateCheck = new HashSet();
+ if (outputColNames) {
+ for (int i = 0; i < ccount; ++i) {
+ String colAlias = unescapeIdentifier(((ASTNode) collist.getChild(i))
+ .getText());
+ failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
+ String intName = getColumnInternalName(i);
+ ColumnInfo colInfo = new ColumnInfo(intName,
+ TypeInfoFactory.stringTypeInfo, null, false);
+ colInfo.setAlias(colAlias);
+ outputCols.add(colInfo);
+ }
+ } else {
+ for (int i = 0; i < ccount; ++i) {
+ ASTNode child = (ASTNode) collist.getChild(i);
+ assert child.getType() == HiveParser.TOK_TABCOL;
+ String colAlias = unescapeIdentifier(((ASTNode) child.getChild(0))
+ .getText());
+ failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
+ String intName = getColumnInternalName(i);
+ ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoUtils
+ .getTypeInfoFromTypeString(getTypeStringFromAST((ASTNode) child
+ .getChild(1))), null, false);
+ colInfo.setAlias(colAlias);
+ outputCols.add(colInfo);
+ }
+ }
+ }
+
+ RowResolver out_rwsch = new RowResolver();
+ StringBuilder columns = new StringBuilder();
+ StringBuilder columnTypes = new StringBuilder();
+
+ for (int i = 0; i < outputCols.size(); ++i) {
+ if (i != 0) {
+ columns.append(",");
+ columnTypes.append(",");
+ }
+
+ columns.append(outputCols.get(i).getInternalName());
+ columnTypes.append(outputCols.get(i).getType().getTypeName());
+
+ out_rwsch.put(qb.getParseInfo().getAlias(), outputCols.get(i).getAlias(),
+ outputCols.get(i));
+ }
+
+ StringBuilder inpColumns = new StringBuilder();
+ StringBuilder inpColumnTypes = new StringBuilder();
+ ArrayList inputSchema = opParseCtx.get(input).getRowResolver()
+ .getColumnInfos();
+ for (int i = 0; i < inputSchema.size(); ++i) {
+ if (i != 0) {
+ inpColumns.append(",");
+ inpColumnTypes.append(",");
+ }
+
+ inpColumns.append(inputSchema.get(i).getInternalName());
+ inpColumnTypes.append(inputSchema.get(i).getType().getTypeName());
+ }
+
+ TableDesc outInfo;
+ TableDesc errInfo;
+ TableDesc inInfo;
+ String defaultSerdeName = conf.getVar(HiveConf.ConfVars.HIVESCRIPTSERDE);
+ Class extends Deserializer> serde;
+
+ try {
+ serde = (Class extends Deserializer>) Class.forName(defaultSerdeName,
+ true, Utilities.getSessionSpecifiedClassLoader());
+ } catch (ClassNotFoundException e) {
+ throw new SemanticException(e);
+ }
+
+ int fieldSeparator = Utilities.tabCode;
+ if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESCRIPTESCAPE)) {
+ fieldSeparator = Utilities.ctrlaCode;
+ }
+
+ // Input and Output Serdes
+ if (trfm.getChild(inputSerDeNum).getChildCount() > 0) {
+ inInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm
+ .getChild(inputSerDeNum))).getChild(0), inpColumns.toString(),
+ inpColumnTypes.toString(), false);
+ } else {
+ inInfo = PlanUtils.getTableDesc(serde, Integer
+ .toString(fieldSeparator), inpColumns.toString(), inpColumnTypes
+ .toString(), false, true);
+ }
+
+ if (trfm.getChild(outputSerDeNum).getChildCount() > 0) {
+ outInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm
+ .getChild(outputSerDeNum))).getChild(0), columns.toString(),
+ columnTypes.toString(), false);
+ // This is for backward compatibility. If the user did not specify the
+ // output column list, we assume that there are 2 columns: key and value.
+ // However, if the script outputs: col1, col2, col3 seperated by TAB, the
+ // requirement is: key is col and value is (col2 TAB col3)
+ } else {
+ outInfo = PlanUtils.getTableDesc(serde, Integer
+ .toString(fieldSeparator), columns.toString(), columnTypes
+ .toString(), defaultOutputCols);
+ }
+
+ // Error stream always uses the default serde with a single column
+ errInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), "KEY");
+
+ // Output record readers
+ Class extends RecordReader> outRecordReader = getRecordReader((ASTNode) trfm
+ .getChild(outputRecordReaderNum));
+ Class extends RecordWriter> inRecordWriter = getRecordWriter((ASTNode) trfm
+ .getChild(inputRecordWriterNum));
+ Class extends RecordReader> errRecordReader = getDefaultRecordReader();
+
+ Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new ScriptDesc(
+ fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())),
+ inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo),
+ new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
+ output.setColumnExprMap(new HashMap()); // disable backtracking
+
+ // Add URI entity for transform script. script assumed t be local unless downloadable
+ if (conf.getBoolVar(ConfVars.HIVE_CAPTURE_TRANSFORM_ENTITY)) {
+ String scriptCmd = getScriptProgName(stripQuotes(trfm.getChild(execPos).getText()));
+ getInputs().add(new ReadEntity(new Path(scriptCmd),
+ ResourceDownloader.isFileUri(scriptCmd)));
+ }
+
+ return output;
+ }
+
+ private Class extends RecordReader> getRecordReader(ASTNode node)
+ throws SemanticException {
+ String name;
+
+ if (node.getChildCount() == 0) {
+ name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDREADER);
+ } else {
+ name = unescapeSQLString(node.getChild(0).getText());
+ }
+
+ try {
+ return (Class extends RecordReader>) Class.forName(name, true,
+ Utilities.getSessionSpecifiedClassLoader());
+ } catch (ClassNotFoundException e) {
+ throw new SemanticException(e);
+ }
+ }
+
+ private Class extends RecordReader> getDefaultRecordReader()
+ throws SemanticException {
+ String name;
+
+ name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDREADER);
+
+ try {
+ return (Class extends RecordReader>) Class.forName(name, true,
+ Utilities.getSessionSpecifiedClassLoader());
+ } catch (ClassNotFoundException e) {
+ throw new SemanticException(e);
+ }
+ }
+
+ private Class extends RecordWriter> getRecordWriter(ASTNode node)
+ throws SemanticException {
+ String name;
+
+ if (node.getChildCount() == 0) {
+ name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDWRITER);
+ } else {
+ name = unescapeSQLString(node.getChild(0).getText());
+ }
+
+ try {
+ return (Class extends RecordWriter>) Class.forName(name, true,
+ Utilities.getSessionSpecifiedClassLoader());
+ } catch (ClassNotFoundException e) {
+ throw new SemanticException(e);
+ }
+ }
+
+ protected List getGroupingSetsForRollup(int size) {
+ List groupingSetKeys = new ArrayList();
+ for (int i = 0; i <= size; i++) {
+ groupingSetKeys.add((1 << i) - 1);
+ }
+ return groupingSetKeys;
+ }
+
+ protected List getGroupingSetsForCube(int size) {
+ int count = 1 << size;
+ List results = new ArrayList(count);
+ for (int i = 0; i < count; ++i) {
+ results.add(i);
+ }
+ return results;
+ }
+
+ // This function returns the grouping sets along with the grouping expressions
+ // Even if rollups and cubes are present in the query, they are converted to
+ // grouping sets at this point
+ private ObjectPair, List> getGroupByGroupingSetsForClause(
+ QBParseInfo parseInfo, String dest) throws SemanticException {
+ List groupingSets = new ArrayList();
+ List groupByExprs = getGroupByForClause(parseInfo, dest);
+ if (parseInfo.getDestRollups().contains(dest)) {
+ groupingSets = getGroupingSetsForRollup(groupByExprs.size());
+ } else if (parseInfo.getDestCubes().contains(dest)) {
+ groupingSets = getGroupingSetsForCube(groupByExprs.size());
+ } else if (parseInfo.getDestGroupingSets().contains(dest)) {
+ groupingSets = getGroupingSets(groupByExprs, parseInfo, dest);
+ }
+
+ return new ObjectPair, List>(groupByExprs, groupingSets);
+ }
+
+ protected List getGroupingSets(List groupByExpr, QBParseInfo parseInfo,
+ String dest) throws SemanticException {
+ Map exprPos = new HashMap();
+ for (int i = 0; i < groupByExpr.size(); ++i) {
+ ASTNode node = groupByExpr.get(i);
+ exprPos.put(node.toStringTree(), i);
+ }
+
+ ASTNode root = parseInfo.getGroupByForClause(dest);
+ List result = new ArrayList(root == null ? 0 : root.getChildCount());
+ if (root != null) {
+ for (int i = 0; i < root.getChildCount(); ++i) {
+ ASTNode child = (ASTNode) root.getChild(i);
+ if (child.getType() != HiveParser.TOK_GROUPING_SETS_EXPRESSION) {
+ continue;
+ }
+ int bitmap = 0;
+ for (int j = 0; j < child.getChildCount(); ++j) {
+ String treeAsString = child.getChild(j).toStringTree();
+ Integer pos = exprPos.get(treeAsString);
+ if (pos == null) {
+ throw new SemanticException(
+ generateErrorMessage((ASTNode) child.getChild(j),
+ ErrorMsg.HIVE_GROUPING_SETS_EXPR_NOT_IN_GROUPBY.getErrorCodedMsg()));
+ }
+ bitmap = setBit(bitmap, pos);
+ }
+ result.add(bitmap);
+ }
+ }
+ if (checkForNoAggr(result)) {
+ throw new SemanticException(
+ ErrorMsg.HIVE_GROUPING_SETS_AGGR_NOFUNC.getMsg());
+ }
+ return result;
+ }
+
+ private boolean checkForNoAggr(List bitmaps) {
+ boolean ret = true;
+ for (int mask : bitmaps) {
+ ret &= mask == 0;
+ }
+ return ret;
+ }
+
+ public static int setBit(int bitmap, int bitIdx) {
+ return bitmap | (1 << bitIdx);
+ }
+
+ /**
+ * This function is a wrapper of parseInfo.getGroupByForClause which
+ * automatically translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY
+ * a,b,c.
+ */
+ List getGroupByForClause(QBParseInfo parseInfo, String dest) throws SemanticException {
+ if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
+ ASTNode selectExprs = parseInfo.getSelForClause(dest);
+ List result = new ArrayList(selectExprs == null ? 0
+ : selectExprs.getChildCount());
+ if (selectExprs != null) {
+ HashMap windowingExprs = parseInfo.getWindowingExprsForClause(dest);
+
+ for (int i = 0; i < selectExprs.getChildCount(); ++i) {
+ if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == HiveParser.TOK_HINTLIST) {
+ continue;
+ }
+ // table.column AS alias
+ ASTNode grpbyExpr = (ASTNode) selectExprs.getChild(i).getChild(0);
+ /*
+ * If this is handled by Windowing then ignore it.
+ */
+ if (windowingExprs != null && windowingExprs.containsKey(grpbyExpr.toStringTree())) {
+ if (!isCBOExecuted()) {
+ throw new SemanticException("SELECT DISTINCT not allowed in the presence of windowing"
+ + " functions when CBO is off");
+ }
+ continue;
+ }
+ result.add(grpbyExpr);
+ }
+ }
+ return result;
+ } else {
+ ASTNode grpByExprs = parseInfo.getGroupByForClause(dest);
+ List result = new ArrayList(grpByExprs == null ? 0
+ : grpByExprs.getChildCount());
+ if (grpByExprs != null) {
+ for (int i = 0; i < grpByExprs.getChildCount(); ++i) {
+ ASTNode grpbyExpr = (ASTNode) grpByExprs.getChild(i);
+ if (grpbyExpr.getType() != HiveParser.TOK_GROUPING_SETS_EXPRESSION) {
+ result.add(grpbyExpr);
+ }
+ }
+ }
+ return result;
+ }
+ }
+
+ static String[] getColAlias(ASTNode selExpr, String defaultName,
+ RowResolver inputRR, boolean includeFuncName, int colNum) {
+ String colAlias = null;
+ String tabAlias = null;
+ String[] colRef = new String[2];
+
+ //for queries with a windowing expressions, the selexpr may have a third child
+ if (selExpr.getChildCount() == 2 ||
+ (selExpr.getChildCount() == 3 &&
+ selExpr.getChild(2).getType() == HiveParser.TOK_WINDOWSPEC)) {
+ // return zz for "xx + yy AS zz"
+ colAlias = unescapeIdentifier(selExpr.getChild(1).getText().toLowerCase());
+ colRef[0] = tabAlias;
+ colRef[1] = colAlias;
+ return colRef;
+ }
+
+ ASTNode root = (ASTNode) selExpr.getChild(0);
+ if (root.getType() == HiveParser.TOK_TABLE_OR_COL) {
+ colAlias =
+ BaseSemanticAnalyzer.unescapeIdentifier(root.getChild(0).getText().toLowerCase());
+ colRef[0] = tabAlias;
+ colRef[1] = colAlias;
+ return colRef;
+ }
+
+ if (root.getType() == HiveParser.DOT) {
+ ASTNode tab = (ASTNode) root.getChild(0);
+ if (tab.getType() == HiveParser.TOK_TABLE_OR_COL) {
+ String t = unescapeIdentifier(tab.getChild(0).getText());
+ if (inputRR.hasTableAlias(t)) {
+ tabAlias = t;
+ }
+ }
+
+ // Return zz for "xx.zz" and "xx.yy.zz"
+ ASTNode col = (ASTNode) root.getChild(1);
+ if (col.getType() == HiveParser.Identifier) {
+ colAlias = unescapeIdentifier(col.getText().toLowerCase());
+ }
+ }
+
+ // if specified generate alias using func name
+ if (includeFuncName && (root.getType() == HiveParser.TOK_FUNCTION)) {
+
+ String expr_flattened = root.toStringTree();
+
+ // remove all TOK tokens
+ String expr_no_tok = expr_flattened.replaceAll("tok_\\S+", "");
+
+ // remove all non alphanumeric letters, replace whitespace spans with underscore
+ String expr_formatted = expr_no_tok.replaceAll("\\W", " ").trim().replaceAll("\\s+", "_");
+
+ // limit length to 20 chars
+ if (expr_formatted.length() > AUTOGEN_COLALIAS_PRFX_MAXLENGTH) {
+ expr_formatted = expr_formatted.substring(0, AUTOGEN_COLALIAS_PRFX_MAXLENGTH);
+ }
+
+ // append colnum to make it unique
+ colAlias = expr_formatted.concat("_" + colNum);
+ }
+
+ if (colAlias == null) {
+ // Return defaultName if selExpr is not a simple xx.yy.zz
+ colAlias = defaultName + colNum;
+ }
+
+ colRef[0] = tabAlias;
+ colRef[1] = colAlias;
+ return colRef;
+ }
+
+ /**
+ * Returns whether the pattern is a regex expression (instead of a normal
+ * string). Normal string is a string with all alphabets/digits and "_".
+ */
+ static boolean isRegex(String pattern, HiveConf conf) {
+ String qIdSupport = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_QUOTEDID_SUPPORT);
+ if ( "column".equals(qIdSupport)) {
+ return false;
+ }
+ for (int i = 0; i < pattern.length(); i++) {
+ if (!Character.isLetterOrDigit(pattern.charAt(i))
+ && pattern.charAt(i) != '_') {
+ return true;
+ }
+ }
+ return false;
+ }
+
+
+ private Operator> genSelectPlan(String dest, QB qb, Operator> input,
+ Operator> inputForSelectStar) throws SemanticException {
+ ASTNode selExprList = qb.getParseInfo().getSelForClause(dest);
+ Operator> op = genSelectPlan(dest, selExprList, qb, input, inputForSelectStar, false);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Created Select Plan for clause: " + dest);
+ }
+
+ return op;
+ }
+
+ @SuppressWarnings("nls")
+ private Operator> genSelectPlan(String dest, ASTNode selExprList, QB qb, Operator> input,
+ Operator> inputForSelectStar, boolean outerLV) throws SemanticException {
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("tree: " + selExprList.toStringTree());
+ }
+
+ ArrayList col_list = new ArrayList();
+ RowResolver out_rwsch = new RowResolver();
+ ASTNode trfm = null;
+ Integer pos = Integer.valueOf(0);
+ RowResolver inputRR = opParseCtx.get(input).getRowResolver();
+ RowResolver starRR = null;
+ if (inputForSelectStar != null && inputForSelectStar != input) {
+ starRR = opParseCtx.get(inputForSelectStar).getRowResolver();
+ }
+ // SELECT * or SELECT TRANSFORM(*)
+ boolean selectStar = false;
+ int posn = 0;
+ boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.TOK_HINTLIST);
+ if (hintPresent) {
+ posn++;
+ }
+
+ boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() ==
+ HiveParser.TOK_TRANSFORM);
+ if (isInTransform) {
+ queryProperties.setUsesScript(true);
+ globalLimitCtx.setHasTransformOrUDTF(true);
+ trfm = (ASTNode) selExprList.getChild(posn).getChild(0);
+ }
+
+ // Detect queries of the form SELECT udtf(col) AS ...
+ // by looking for a function as the first child, and then checking to see
+ // if the function is a Generic UDTF. It's not as clean as TRANSFORM due to
+ // the lack of a special token.
+ boolean isUDTF = false;
+ String udtfTableAlias = null;
+ ArrayList